apache
diff --git a/‎R/create-rd.sh‎
Lines changed: 1 addition & 1 deletion b/‎R/create-rd.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 67 additions & 45 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 67 additions & 45 deletions
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 0 additions & 1 deletion b/‎R/pkg/R/functions.R‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java‎
Lines changed: 9 additions & 14 deletions b/‎core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js‎
Lines changed: 4 additions & 15 deletions b/‎core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js‎
Lines changed: 4 additions & 15 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/BarrierTaskContext.scala‎
Lines changed: 14 additions & 16 deletions b/‎core/src/main/scala/org/apache/spark/BarrierTaskContext.scala‎
Lines changed: 14 additions & 16 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 3 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 3 additions & 1 deletion
@@ -34,4 +34,4 @@ pushd "$FWDIR" > /dev/null
 . "$FWDIR/find-r.sh"
 
 # Generate Rd files if devtools is installed
-"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
+"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); setwd("'$FWDIR'"); devtools::document(pkg="./pkg", roclets=c("rd")) }'
@@ -1657,9 +1657,7 @@ setMethod("dapplyCollect",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by grouping
-#'             column of the SparkDataFrame. The function \code{func} takes as argument
-#'             a key - grouping columns and a data frame - a local R data.frame.
-#'             The output of \code{func} is a local R data.frame.
+#'             column of the SparkDataFrame. See Details.
 #' @param schema the schema of the resulting SparkDataFrame after the function is applied.
 #'               The schema must match to output of \code{func}. It has to be defined for each
 #'               output column with preferred output column name and corresponding data type.
@@ -1669,29 +1667,43 @@ setMethod("dapplyCollect",
 #' @aliases gapply,SparkDataFrame-method
 #' @rdname gapply
 #' @name gapply
+#' @details
+#' \code{func} is a function of two arguments. The first, usually named \code{key}
+#' (though this is not enforced) corresponds to the grouping key, will be an
+#' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
+#' to the grouping columns' values for the current group.
+#'
+#' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
+#' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
+#'
+#' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
+#' in particular this means the names of the output \code{data.frame} are irrelevant
+#'
 #' @seealso \link{gapplyCollect}
 #' @examples
 #'
 #' \dontrun{
-#' Computes the arithmetic mean of the second column by grouping
-#' on the first and third columns. Output the grouping values and the average.
+#' # Computes the arithmetic mean of the second column by grouping
+#' # on the first and third columns. Output the grouping values and the average.
 #'
 #' df <- createDataFrame (
 #' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
 #'   c("a", "b", "c", "d"))
 #'
-#' Here our output contains three columns, the key which is a combination of two
-#' columns with data types integer and string and the mean which is a double.
+#' # Here our output contains three columns, the key which is a combination of two
+#' # columns with data types integer and string and the mean which is a double.
 #' schema <- structType(structField("a", "integer"), structField("c", "string"),
 #'   structField("avg", "double"))
 #' result <- gapply(
 #'   df,
 #'   c("a", "c"),
 #'   function(key, x) {
+#'     # key will either be list(1L, '1') (for the group where a=1L,c='1') or
+#'     #   list(3L, '3') (for the group where a=3L,c='3')
 #'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
 #' }, schema)
 #'
-#' The schema also can be specified in a DDL-formatted string.
+#' # The schema also can be specified in a DDL-formatted string.
 #' schema <- "a INT, c STRING, avg DOUBLE"
 #' result <- gapply(
 #'   df,
@@ -1700,8 +1712,8 @@ setMethod("dapplyCollect",
 #'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
 #' }, schema)
 #'
-#' We can also group the data and afterwards call gapply on GroupedData.
-#' For Example:
+#' # We can also group the data and afterwards call gapply on GroupedData.
+#' # For example:
 #' gdf <- group_by(df, "a", "c")
 #' result <- gapply(
 #'   gdf,
@@ -1710,15 +1722,15 @@ setMethod("dapplyCollect",
 #' }, schema)
 #' collect(result)
 #'
-#' Result
-#' ------
-#' a c avg
-#' 3 3 3.0
-#' 1 1 1.5
+#' # Result
+#' # ------
+#' # a c avg
+#' # 3 3 3.0
+#' # 1 1 1.5
 #'
-#' Fits linear models on iris dataset by grouping on the 'Species' column and
-#' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
-#' and 'Petal_Width' as training features.
+#' # Fits linear models on iris dataset by grouping on the 'Species' column and
+#' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
+#' # and 'Petal_Width' as training features.
 #'
 #' df <- createDataFrame (iris)
 #' schema <- structType(structField("(Intercept)", "double"),
@@ -1734,12 +1746,12 @@ setMethod("dapplyCollect",
 #'   }, schema)
 #' collect(df1)
 #'
-#' Result
-#' ---------
-#' Model  (Intercept)  Sepal_Width  Petal_Length  Petal_Width
-#' 1        0.699883    0.3303370    0.9455356    -0.1697527
-#' 2        1.895540    0.3868576    0.9083370    -0.6792238
-#' 3        2.351890    0.6548350    0.2375602     0.2521257
+#' # Result
+#' # ---------
+#' # Model  (Intercept)  Sepal_Width  Petal_Length  Petal_Width
+#' # 1        0.699883    0.3303370    0.9455356    -0.1697527
+#' # 2        1.895540    0.3868576    0.9083370    -0.6792238
+#' # 3        2.351890    0.6548350    0.2375602     0.2521257
 #'
 #'}
 #' @note gapply(SparkDataFrame) since 2.0.0
@@ -1757,20 +1769,30 @@ setMethod("gapply",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by grouping
-#'             column of the SparkDataFrame. The function \code{func} takes as argument
-#'             a key - grouping columns and a data frame - a local R data.frame.
-#'             The output of \code{func} is a local R data.frame.
+#'             column of the SparkDataFrame. See Details.
 #' @return A data.frame.
 #' @family SparkDataFrame functions
 #' @aliases gapplyCollect,SparkDataFrame-method
 #' @rdname gapplyCollect
 #' @name gapplyCollect
+#' @details
+#' \code{func} is a function of two arguments. The first, usually named \code{key}
+#' (though this is not enforced) corresponds to the grouping key, will be an
+#' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
+#' to the grouping columns' values for the current group.
+#'
+#' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
+#' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
+#'
+#' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
+#' in particular this means the names of the output \code{data.frame} are irrelevant
+#'
 #' @seealso \link{gapply}
 #' @examples
 #'
 #' \dontrun{
-#' Computes the arithmetic mean of the second column by grouping
-#' on the first and third columns. Output the grouping values and the average.
+#' # Computes the arithmetic mean of the second column by grouping
+#' # on the first and third columns. Output the grouping values and the average.
 #'
 #' df <- createDataFrame (
 #' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
@@ -1785,8 +1807,8 @@ setMethod("gapply",
 #'     y
 #'   })
 #'
-#' We can also group the data and afterwards call gapply on GroupedData.
-#' For Example:
+#' # We can also group the data and afterwards call gapply on GroupedData.
+#' # For example:
 #' gdf <- group_by(df, "a", "c")
 #' result <- gapplyCollect(
 #'   gdf,
@@ -1796,15 +1818,15 @@ setMethod("gapply",
 #'     y
 #'   })
 #'
-#' Result
-#' ------
-#' key_a key_c mean_b
-#' 3 3 3.0
-#' 1 1 1.5
+#' # Result
+#' # ------
+#' # key_a key_c mean_b
+#' # 3 3 3.0
+#' # 1 1 1.5
 #'
-#' Fits linear models on iris dataset by grouping on the 'Species' column and
-#' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
-#' and 'Petal_Width' as training features.
+#' # Fits linear models on iris dataset by grouping on the 'Species' column and
+#' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
+#' # and 'Petal_Width' as training features.
 #'
 #' df <- createDataFrame (iris)
 #' result <- gapplyCollect(
@@ -1816,12 +1838,12 @@ setMethod("gapply",
 #'     data.frame(t(coef(m)))
 #'   })
 #'
-#' Result
-#'---------
-#' Model  X.Intercept.  Sepal_Width  Petal_Length  Petal_Width
-#' 1        0.699883    0.3303370    0.9455356    -0.1697527
-#' 2        1.895540    0.3868576    0.9083370    -0.6792238
-#' 3        2.351890    0.6548350    0.2375602     0.2521257
+#' # Result
+#' # ---------
+#' # Model  X.Intercept.  Sepal_Width  Petal_Length  Petal_Width
+#' # 1        0.699883    0.3303370    0.9455356    -0.1697527
+#' # 2        1.895540    0.3868576    0.9083370    -0.6792238
+#' # 3        2.351890    0.6548350    0.2375602     0.2521257
 #'
 #'}
 #' @note gapplyCollect(SparkDataFrame) since 2.0.0
 
@@ -3951,7 +3951,6 @@ setMethod("map_values",
 #' @rdname column_collection_functions
 #' @aliases map_zip_with map_zip_with,characterOrColumn,characterOrColumn,function-method
 #'
-#' @examples
 #' @note map_zip_with since 3.1.0
 setMethod("map_zip_with",
           signature(x = "characterOrColumn", y = "characterOrColumn", f = "function"),
 
@@ -406,17 +406,10 @@ private void handleFailedDelete() {
    *
    * For efficiency, all calls to `next()` will return the same {@link Location} object.
    *
-   * If any other lookups or operations are performed on this map while iterating over it, including
-   * `lookup()`, the behavior of the returned iterator is undefined.
+   * The returned iterator is thread-safe. However if the map is modified while iterating over it,
+   * the behavior of the returned iterator is undefined.
    */
   public MapIterator iterator() {
-    return new MapIterator(numValues, loc, false);
-  }
-
-  /**
-   * Returns a thread safe iterator that iterates of the entries of this map.
-   */
-  public MapIterator safeIterator() {
     return new MapIterator(numValues, new Location(), false);
   }
 
@@ -427,19 +420,20 @@ public MapIterator safeIterator() {
    *
    * For efficiency, all calls to `next()` will return the same {@link Location} object.
    *
-   * If any other lookups or operations are performed on this map while iterating over it, including
-   * `lookup()`, the behavior of the returned iterator is undefined.
+   * The returned iterator is thread-safe. However if the map is modified while iterating over it,
+   * the behavior of the returned iterator is undefined.
    */
   public MapIterator destructiveIterator() {
     updatePeakMemoryUsed();
-    return new MapIterator(numValues, loc, true);
+    return new MapIterator(numValues, new Location(), true);
   }
 
   /**
    * Looks up a key, and return a {@link Location} handle that can be used to test existence
    * and read/write values.
    *
-   * This function always return the same {@link Location} instance to avoid object allocation.
+   * This function always returns the same {@link Location} instance to avoid object allocation.
+   * This function is not thread-safe.
    */
   public Location lookup(Object keyBase, long keyOffset, int keyLength) {
     safeLookup(keyBase, keyOffset, keyLength, loc,
@@ -451,7 +445,8 @@ public Location lookup(Object keyBase, long keyOffset, int keyLength) {
    * Looks up a key, and return a {@link Location} handle that can be used to test existence
    * and read/write values.
    *
-   * This function always return the same {@link Location} instance to avoid object allocation.
+   * This function always returns the same {@link Location} instance to avoid object allocation.
+   * This function is not thread-safe.
    */
   public Location lookup(Object keyBase, long keyOffset, int keyLength, int hash) {
     safeLookup(keyBase, keyOffset, keyLength, loc, hash);
 
@@ -22,6 +22,7 @@
 #dag-viz-graph .label {
   font-weight: normal;
   text-shadow: none;
+  color: #333;
 }
 
 #dag-viz-graph svg path {
 
@@ -173,8 +173,8 @@ function renderDagViz(forJob) {
   });
 
   metadataContainer().selectAll(".barrier-rdd").each(function() {
-    var rddId = d3.select(this).text().trim()
-    var clusterId = VizConstants.clusterPrefix + rddId
+    var rddId = d3.select(this).text().trim();
+    var clusterId = VizConstants.clusterPrefix + rddId;
     svg.selectAll("g." + clusterId).classed("barrier", true)
   });
 
@@ -282,11 +282,7 @@ function renderDagVizForJob(svgContainer) {
 
 /* Render the dot file as an SVG in the given container. */
 function renderDot(dot, container, forJob) {
-  var escaped_dot = dot
-    .replace(/&lt;/g, "<")
-    .replace(/&gt;/g, ">")
-    .replace(/&quot;/g, "\"");
-  var g = graphlibDot.read(escaped_dot);
+  var g = graphlibDot.read(dot);
   var renderer = new dagreD3.render();
   preprocessGraphLayout(g, forJob);
   renderer(container, g);
@@ -498,18 +494,11 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
   edgesContainer.append("path").datum(points).attr("d", line);
 }
 
-/*
- * Replace `/n` with `<br/>`
- */
-function replaceLineBreak(str) {
-    return str.replace("\\n", "<br/>");
-}
-
 /* (Job page only) Helper function to add tooltips for RDDs. */
 function addTooltipsForRDDs(svgContainer) {
   svgContainer.selectAll("g.node").each(function() {
     var node = d3.select(this);
-    var tooltipText = replaceLineBreak(node.attr("name"));
+    var tooltipText = node.attr("name");
     if (tooltipText) {
       node.select("circle")
         .attr("data-toggle", "tooltip")
 
@@ -20,9 +20,9 @@ package org.apache.spark
 import java.util.{Properties, Timer, TimerTask}
 
 import scala.collection.JavaConverters._
-import scala.concurrent.TimeoutException
 import scala.concurrent.duration._
 import scala.language.postfixOps
+import scala.util.{Failure, Success => ScalaSuccess, Try}
 
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.executor.TaskMetrics
@@ -85,28 +85,26 @@ class BarrierTaskContext private[spark] (
         // BarrierCoordinator on timeout, instead of RPCTimeoutException from the RPC framework.
         timeout = new RpcTimeout(365.days, "barrierTimeout"))
 
-      // messages which consist of all barrier tasks' messages
-      var messages: Array[String] = null
       // Wait the RPC future to be completed, but every 1 second it will jump out waiting
       // and check whether current spark task is killed. If killed, then throw
       // a `TaskKilledException`, otherwise continue wait RPC until it completes.
-      try {
-        while (!abortableRpcFuture.toFuture.isCompleted) {
+
+      while (!abortableRpcFuture.future.isCompleted) {
+        try {
           // wait RPC future for at most 1 second
-          try {
-            messages = ThreadUtils.awaitResult(abortableRpcFuture.toFuture, 1.second)
-          } catch {
-            case _: TimeoutException | _: InterruptedException =>
-              // If `TimeoutException` thrown, waiting RPC future reach 1 second.
-              // If `InterruptedException` thrown, it is possible this task is killed.
-              // So in this two cases, we should check whether task is killed and then
-              // throw `TaskKilledException`
-              taskContext.killTaskIfInterrupted()
+          Thread.sleep(1000)
+        } catch {
+          case _: InterruptedException => // task is killed by driver
+        } finally {
+          Try(taskContext.killTaskIfInterrupted()) match {
+            case ScalaSuccess(_) => // task is still running healthily
+            case Failure(e) => abortableRpcFuture.abort(e)
           }
         }
-      } finally {
-        abortableRpcFuture.abort(taskContext.getKillReason().getOrElse("Unknown reason."))
       }
+      // messages which consist of all barrier tasks' messages. The future will return the
+      // desired messages if it is completed successfully. Otherwise, exception could be thrown.
+      val messages = abortableRpcFuture.future.value.get.get
 
       barrierEpoch += 1
       logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) finished " +
 
@@ -2740,7 +2740,7 @@ object SparkContext extends Logging {
       case "local" => 1
       case SparkMasterRegex.LOCAL_N_REGEX(threads) => convertToInt(threads)
       case SparkMasterRegex.LOCAL_N_FAILURES_REGEX(threads, _) => convertToInt(threads)
-      case "yarn" =>
+      case "yarn" | SparkMasterRegex.KUBERNETES_REGEX(_) =>
         if (conf != null && conf.get(SUBMIT_DEPLOY_MODE) == "cluster") {
           conf.getInt(DRIVER_CORES.key, 0)
         } else {
@@ -2885,6 +2885,8 @@ private object SparkMasterRegex {
   val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
   // Regular expression for connecting to Spark deploy clusters
   val SPARK_REGEX = """spark://(.*)""".r
+  // Regular expression for connecting to kubernetes clusters
+  val KUBERNETES_REGEX = """k8s://(.*)""".r
 }
 
 /**
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`#dag-viz-graph .label {`
`23`	`23`	`font-weight: normal;`
`24`	`24`	`text-shadow: none;`
	`25`	`+ color: #333;`
`25`	`26`	`}`
`26`	`27`
`27`	`28`	`#dag-viz-graph svg path {`