apache
diff --git a/‎R/pkg/R/mllib.R‎
Lines changed: 6 additions & 4 deletions b/‎R/pkg/R/mllib.R‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎R/pkg/R/window.R‎
Lines changed: 6 additions & 6 deletions b/‎R/pkg/R/window.R‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 6 additions & 0 deletions b/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java‎
Lines changed: 4 additions & 4 deletions b/‎common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala‎
Lines changed: 14 additions & 24 deletions b/‎core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala‎
Lines changed: 14 additions & 24 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala‎
Lines changed: 1 addition & 1 deletion
@@ -138,10 +138,11 @@ predict_internal <- function(object, newData) {
 #'               This can be a character string naming a family function, a family function or
 #'               the result of a call to a family function. Refer R family at
 #'               \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
-#'                  weights as 1.0.
 #' @param tol positive convergence tolerance of iterations.
 #' @param maxIter integer giving the maximal number of IRLS iterations.
+#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
+#'                  weights as 1.0.
+#' @param regParam regularization parameter for L2 regularization.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.glm,SparkDataFrame,formula-method
 #' @return \code{spark.glm} returns a fitted generalized linear model
@@ -171,7 +172,8 @@ predict_internal <- function(object, newData) {
 #' @note spark.glm since 2.0.0
 #' @seealso \link{glm}, \link{read.ml}
 setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
-          function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, weightCol = NULL) {
+          function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, weightCol = NULL,
+                   regParam = 0.0) {
             if (is.character(family)) {
               family <- get(family, mode = "function", envir = parent.frame())
             }
@@ -190,7 +192,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
 
             jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper",
                                 "fit", formula, data@sdf, family$family, family$link,
-                                tol, as.integer(maxIter), as.character(weightCol))
+                                tol, as.integer(maxIter), as.character(weightCol), regParam)
             new("GeneralizedLinearRegressionModel", jobj = jobj)
           })
 
 
@@ -21,9 +21,9 @@
 #'
 #' Creates a WindowSpec with the partitioning defined.
 #'
-#' @param col A column name or Column by which rows are partitioned to 
+#' @param col A column name or Column by which rows are partitioned to
 #'            windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'            which rows are partitioned to windows.
 #'
 #' @rdname windowPartitionBy
@@ -32,10 +32,10 @@
 #' @export
 #' @examples
 #' \dontrun{
-#'   ws <- windowPartitionBy("key1", "key2")
+#'   ws <- orderBy(windowPartitionBy("key1", "key2"), "key3")
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #'
-#'   ws <- windowPartitionBy(df$key1, df$key2)
+#'   ws <- orderBy(windowPartitionBy(df$key1, df$key2), df$key3)
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #' }
 #' @note windowPartitionBy(character) since 2.0.0
@@ -70,9 +70,9 @@ setMethod("windowPartitionBy",
 #'
 #' Creates a WindowSpec with the ordering defined.
 #'
-#' @param col A column name or Column by which rows are ordered within 
+#' @param col A column name or Column by which rows are ordered within
 #'            windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'            which rows are ordered within windows.
 #'
 #' @rdname windowOrderBy
 
@@ -148,6 +148,12 @@ test_that("spark.glm summary", {
   baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = iris)
   baseSummary <- summary(baseModel)
   expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4)
+
+  # Test spark.glm works with regularization parameter
+  data <- as.data.frame(cbind(a1, a2, b))
+  df <- suppressWarnings(createDataFrame(data))
+  regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
+  expect_equal(regStats$aic, 13.32836, tolerance = 1e-4) # 13.32836 is from summary() result
 })
 
 test_that("spark.glm save/load", {
 
@@ -470,7 +470,7 @@ public UTF8String trim() {
     while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
     if (s > e) {
       // empty string
-      return UTF8String.fromBytes(new byte[0]);
+      return EMPTY_UTF8;
     } else {
       return copyUTF8String(s, e);
     }
@@ -482,7 +482,7 @@ public UTF8String trimLeft() {
     while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
     if (s == this.numBytes) {
       // empty string
-      return UTF8String.fromBytes(new byte[0]);
+      return EMPTY_UTF8;
     } else {
       return copyUTF8String(s, this.numBytes - 1);
     }
@@ -495,7 +495,7 @@ public UTF8String trimRight() {
 
     if (e < 0) {
       // empty string
-      return UTF8String.fromBytes(new byte[0]);
+      return EMPTY_UTF8;
     } else {
       return copyUTF8String(0, e);
     }
@@ -761,7 +761,7 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
 
     if (numInputs == 0) {
       // Return an empty string if there is no input, or all the inputs are null.
-      return fromBytes(new byte[0]);
+      return EMPTY_UTF8;
     }
 
     // Allocate a new byte array, and copy the inputs one by one into it.
 
@@ -230,7 +230,7 @@ private[spark] class ExecutorAllocationManager(
         }
       }
     }
-    executor.scheduleAtFixedRate(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
+    executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
 
     client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
   }
 
@@ -383,7 +383,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
 
   /** Register multiple map output information for the given shuffle */
   def registerMapOutputs(shuffleId: Int, statuses: Array[MapStatus], changeEpoch: Boolean = false) {
-    mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
+    mapStatuses.put(shuffleId, statuses.clone())
     if (changeEpoch) {
       incrementEpoch()
     }
@@ -535,7 +535,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
             true
           case None =>
             logDebug("cached status not found for : " + shuffleId)
-            statuses = mapStatuses.getOrElse(shuffleId, Array[MapStatus]())
+            statuses = mapStatuses.getOrElse(shuffleId, Array.empty[MapStatus])
             epochGotten = epoch
             false
         }
 
@@ -21,6 +21,8 @@ import java.util.concurrent._
 import java.util.concurrent.{Future => JFuture, ScheduledFuture => JScheduledFuture}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
 
+import scala.concurrent.Future
+import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkConf
@@ -79,11 +81,6 @@ private[spark] class StandaloneAppClient(
     private val registrationRetryThread =
       ThreadUtils.newDaemonSingleThreadScheduledExecutor("appclient-registration-retry-thread")
 
-    // A thread pool to perform receive then reply actions in a thread so as not to block the
-    // event loop.
-    private val askAndReplyThreadPool =
-      ThreadUtils.newDaemonCachedThreadPool("appclient-receive-and-reply-threadpool")
-
     override def onStart(): Unit = {
       try {
         registerWithMaster(1)
@@ -220,19 +217,13 @@ private[spark] class StandaloneAppClient(
         endpointRef: RpcEndpointRef,
         context: RpcCallContext,
         msg: T): Unit = {
-      // Create a thread to ask a message and reply with the result.  Allow thread to be
+      // Ask a message and create a thread to reply with the result.  Allow thread to be
       // interrupted during shutdown, otherwise context must be notified of NonFatal errors.
-      askAndReplyThreadPool.execute(new Runnable {
-        override def run(): Unit = {
-          try {
-            context.reply(endpointRef.askWithRetry[Boolean](msg))
-          } catch {
-            case ie: InterruptedException => // Cancelled
-            case NonFatal(t) =>
-              context.sendFailure(t)
-          }
-        }
-      })
+      endpointRef.ask[Boolean](msg).andThen {
+        case Success(b) => context.reply(b)
+        case Failure(ie: InterruptedException) => // Cancelled
+        case Failure(NonFatal(t)) => context.sendFailure(t)
+      }(ThreadUtils.sameThread)
     }
 
     override def onDisconnected(address: RpcAddress): Unit = {
@@ -272,7 +263,6 @@ private[spark] class StandaloneAppClient(
       registrationRetryThread.shutdownNow()
       registerMasterFutures.get.foreach(_.cancel(true))
       registerMasterThreadPool.shutdownNow()
-      askAndReplyThreadPool.shutdownNow()
     }
 
   }
@@ -301,25 +291,25 @@ private[spark] class StandaloneAppClient(
    *
    * @return whether the request is acknowledged.
    */
-  def requestTotalExecutors(requestedTotal: Int): Boolean = {
+  def requestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
     if (endpoint.get != null && appId.get != null) {
-      endpoint.get.askWithRetry[Boolean](RequestExecutors(appId.get, requestedTotal))
+      endpoint.get.ask[Boolean](RequestExecutors(appId.get, requestedTotal))
     } else {
       logWarning("Attempted to request executors before driver fully initialized.")
-      false
+      Future.successful(false)
     }
   }
 
   /**
    * Kill the given list of executors through the Master.
    * @return whether the kill request is acknowledged.
    */
-  def killExecutors(executorIds: Seq[String]): Boolean = {
+  def killExecutors(executorIds: Seq[String]): Future[Boolean] = {
     if (endpoint.get != null && appId.get != null) {
-      endpoint.get.askWithRetry[Boolean](KillExecutors(appId.get, executorIds))
+      endpoint.get.ask[Boolean](KillExecutors(appId.get, executorIds))
     } else {
       logWarning("Attempted to kill executors before driver fully initialized.")
-      false
+      Future.successful(false)
     }
   }
 
 
@@ -43,7 +43,7 @@ class ZippedWithIndexRDD[T: ClassTag](prev: RDD[T]) extends RDD[(T, Long)](prev)
   @transient private val startIndices: Array[Long] = {
     val n = prev.partitions.length
     if (n == 0) {
-      Array[Long]()
+      Array.empty
     } else if (n == 1) {
       Array(0L)
     } else {
Original file line number	Diff line number	Diff line change
`@@ -230,7 +230,7 @@ private[spark] class ExecutorAllocationManager(`
`230`	`230`	`}`
`231`	`231`	`}`
`232`	`232`	`}`
`233`		`- executor.scheduleAtFixedRate(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)`
	`233`	`+ executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)`
`234`	`234`
`235`	`235`	`client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)`
`236`	`236`	`}`