apache
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 9 additions & 4 deletions b/‎R/pkg/R/functions.R‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎bin/pyspark‎
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/pyspark2.cmd‎
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark2.cmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎core/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/running-on-yarn.md‎
Lines changed: 4 additions & 1 deletion b/‎docs/running-on-yarn.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala‎
Lines changed: 13 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala‎
Lines changed: 6 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala‎
Lines changed: 12 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala‎
Lines changed: 12 additions & 2 deletions
@@ -265,7 +265,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.9 - http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
      (BSD licence) sbt and sbt-launch-lib.bash
      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
 
@@ -18,16 +18,21 @@
 #' @include generics.R column.R
 NULL
 
-#' Creates a \code{Column} of literal value.
+#' lit
 #'
-#' The passed in object is returned directly if it is already a \linkS4class{Column}.
-#' If the object is a Scala Symbol, it is converted into a \linkS4class{Column} also.
-#' Otherwise, a new \linkS4class{Column} is created to represent the literal value.
+#' A new \linkS4class{Column} is created to represent the literal value.
+#' If the parameter is a \linkS4class{Column}, it is returned unchanged.
 #'
 #' @family normal_funcs
 #' @rdname lit
 #' @name lit
 #' @export
+#' @examples
+#' \dontrun{
+#' lit(df$name)
+#' select(df, lit("x"))
+#' select(df, lit("2015-01-01"))
+#'}
 setMethod("lit", signature("ANY"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions",
 
@@ -65,7 +65,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
-export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
+export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
 
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
 
@@ -350,7 +350,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.8.2.1</version>
+      <version>0.9</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
 
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.9-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
 
@@ -81,14 +81,17 @@ all environment variables used for launching each container. This process is use
 classpath problems in particular. (Note that enabling this requires admin privileges on cluster
 settings and a restart of all node managers. Thus, this is not applicable to hosted clusters).
 
-To use a custom log4j configuration for the application master or executors, there are two options:
+To use a custom log4j configuration for the application master or executors, here are the options:
 
 - upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files
   to be uploaded with the application.
 - add `-Dlog4j.configuration=<location of configuration file>` to `spark.driver.extraJavaOptions`
   (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file,
   the `file:` protocol should be explicitly provided, and the file needs to exist locally on all
   the nodes.
+- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along
+  with the other configurations. Note that other 2 options has higher priority than this option if
+  multiple options are specified.
 
 Note that for the first option, both executors and the application master will share the same
 log4j configuration, which may cause issues when they run on the same node (e.g. trying to write
 
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@@ -30,44 +30,53 @@ import org.apache.spark.sql.types.DoubleType
  * :: Experimental ::
  * Evaluator for binary classification, which expects two input columns: rawPrediction and label.
  */
+@Since("1.2.0")
 @Experimental
-class BinaryClassificationEvaluator(override val uid: String)
+class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Evaluator with HasRawPredictionCol with HasLabelCol {
 
+  @Since("1.2.0")
   def this() = this(Identifiable.randomUID("binEval"))
 
   /**
    * param for metric name in evaluation
    * Default: areaUnderROC
    * @group param
    */
+  @Since("1.2.0")
   val metricName: Param[String] = {
     val allowedParams = ParamValidators.inArray(Array("areaUnderROC", "areaUnderPR"))
     new Param(
       this, "metricName", "metric name in evaluation (areaUnderROC|areaUnderPR)", allowedParams)
   }
 
   /** @group getParam */
+  @Since("1.2.0")
   def getMetricName: String = $(metricName)
 
   /** @group setParam */
+  @Since("1.2.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
   /** @group setParam */
+  @Since("1.5.0")
   def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
 
   /**
    * @group setParam
    * @deprecated use [[setRawPredictionCol()]] instead
    */
   @deprecated("use setRawPredictionCol instead", "1.5.0")
+  @Since("1.2.0")
   def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
 
   /** @group setParam */
+  @Since("1.2.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
   setDefault(metricName -> "areaUnderROC")
 
+  @Since("1.2.0")
   override def evaluate(dataset: DataFrame): Double = {
     val schema = dataset.schema
     SchemaUtils.checkColumnType(schema, $(rawPredictionCol), new VectorUDT)
@@ -87,10 +96,12 @@ class BinaryClassificationEvaluator(override val uid: String)
     metric
   }
 
+  @Since("1.5.0")
   override def isLargerBetter: Boolean = $(metricName) match {
     case "areaUnderROC" => true
     case "areaUnderPR" => true
   }
 
+  @Since("1.4.1")
   override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra)
 }
@@ -17,14 +17,15 @@
 
 package org.apache.spark.ml.evaluation
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.ml.param.{ParamMap, Params}
 import org.apache.spark.sql.DataFrame
 
 /**
  * :: DeveloperApi ::
  * Abstract class for evaluators that compute metrics from predictions.
  */
+@Since("1.5.0")
 @DeveloperApi
 abstract class Evaluator extends Params {
 
@@ -35,6 +36,7 @@ abstract class Evaluator extends Params {
    * @param paramMap parameter map that specifies the input columns and output metrics
    * @return metric
    */
+  @Since("1.5.0")
   def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
     this.copy(paramMap).evaluate(dataset)
   }
@@ -44,14 +46,17 @@ abstract class Evaluator extends Params {
    * @param dataset a dataset that contains labels/observations and predictions.
    * @return metric
    */
+  @Since("1.5.0")
   def evaluate(dataset: DataFrame): Double
 
   /**
    * Indicates whether the metric returned by [[evaluate()]] should be maximized (true, default)
    * or minimized (false).
    * A given evaluator may support multiple metrics which may be maximized or minimized.
    */
+  @Since("1.5.0")
   def isLargerBetter: Boolean = true
 
+  @Since("1.5.0")
   override def copy(extra: ParamMap): Evaluator
 }
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.param.{ParamMap, ParamValidators, Param}
 import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
 import org.apache.spark.ml.util.{SchemaUtils, Identifiable}
@@ -29,17 +29,20 @@ import org.apache.spark.sql.types.DoubleType
  * :: Experimental ::
  * Evaluator for multiclass classification, which expects two input columns: score and label.
  */
+@Since("1.5.0")
 @Experimental
-class MulticlassClassificationEvaluator (override val uid: String)
+class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Evaluator with HasPredictionCol with HasLabelCol {
 
+  @Since("1.5.0")
   def this() = this(Identifiable.randomUID("mcEval"))
 
   /**
    * param for metric name in evaluation (supports `"f1"` (default), `"precision"`, `"recall"`,
    * `"weightedPrecision"`, `"weightedRecall"`)
    * @group param
    */
+  @Since("1.5.0")
   val metricName: Param[String] = {
     val allowedParams = ParamValidators.inArray(Array("f1", "precision",
       "recall", "weightedPrecision", "weightedRecall"))
@@ -48,19 +51,24 @@ class MulticlassClassificationEvaluator (override val uid: String)
   }
 
   /** @group getParam */
+  @Since("1.5.0")
   def getMetricName: String = $(metricName)
 
   /** @group setParam */
+  @Since("1.5.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
   /** @group setParam */
+  @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
   /** @group setParam */
+  @Since("1.5.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
   setDefault(metricName -> "f1")
 
+  @Since("1.5.0")
   override def evaluate(dataset: DataFrame): Double = {
     val schema = dataset.schema
     SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
@@ -81,6 +89,7 @@ class MulticlassClassificationEvaluator (override val uid: String)
     metric
   }
 
+  @Since("1.5.0")
   override def isLargerBetter: Boolean = $(metricName) match {
     case "f1" => true
     case "precision" => true
@@ -89,5 +98,6 @@ class MulticlassClassificationEvaluator (override val uid: String)
     case "weightedRecall" => true
   }
 
+  @Since("1.5.0")
   override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra)
 }
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (`
`30`	`30`	`)`
`31`	`31`
`32`	`32`	`set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%`
`33`		`-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%`
	`33`	`+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9-src.zip;%PYTHONPATH%`
`34`	`34`
`35`	`35`	`set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%`
`36`	`36`	`set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ private[spark] object PythonUtils {`
`32`	`32`	`val pythonPath = new ArrayBuffer[String]`
`33`	`33`	`for (sparkHome <- sys.env.get("SPARK_HOME")) {`
`34`	`34`	`pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)`
`35`		`- pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)`
	`35`	`+ pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.9-src.zip").mkString(File.separator)`
`36`	`36`	`}`
`37`	`37`	`pythonPath ++= SparkContext.jarOfObject(this)`
`38`	`38`	`pythonPath.mkString(File.pathSeparator)`