Skip to content

Commit 22ba64e

Browse files
committed
Merge branch 'master' into SPARK-11207
2 parents f85bca6 + 5cdea7d commit 22ba64e

File tree

52 files changed

+719
-298
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+719
-298
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
265265
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
266266
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
267267
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
268-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
268+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.9 - http://py4j.sourceforge.net/)
269269
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
270270
(BSD licence) sbt and sbt-launch-lib.bash
271271
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

R/pkg/R/functions.R

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,21 @@
1818
#' @include generics.R column.R
1919
NULL
2020

21-
#' Creates a \code{Column} of literal value.
21+
#' lit
2222
#'
23-
#' The passed in object is returned directly if it is already a \linkS4class{Column}.
24-
#' If the object is a Scala Symbol, it is converted into a \linkS4class{Column} also.
25-
#' Otherwise, a new \linkS4class{Column} is created to represent the literal value.
23+
#' A new \linkS4class{Column} is created to represent the literal value.
24+
#' If the parameter is a \linkS4class{Column}, it is returned unchanged.
2625
#'
2726
#' @family normal_funcs
2827
#' @rdname lit
2928
#' @name lit
3029
#' @export
30+
#' @examples
31+
#' \dontrun{
32+
#' lit(df$name)
33+
#' select(df, lit("x"))
34+
#' select(df, lit("2015-01-01"))
35+
#'}
3136
setMethod("lit", signature("ANY"),
3237
function(x) {
3338
jc <- callJStatic("org.apache.spark.sql.functions",

bin/pyspark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ export PYSPARK_PYTHON
6565

6666
# Add the PySpark classes to the Python path:
6767
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
68-
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
68+
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
6969

7070
# Load the PySpark shell.py script when ./pyspark is used interactively:
7171
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"

bin/pyspark2.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
3030
)
3131

3232
set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
33-
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
33+
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9-src.zip;%PYTHONPATH%
3434

3535
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
3636
set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@
350350
<dependency>
351351
<groupId>net.sf.py4j</groupId>
352352
<artifactId>py4j</artifactId>
353-
<version>0.8.2.1</version>
353+
<version>0.9</version>
354354
</dependency>
355355
<dependency>
356356
<groupId>org.apache.spark</groupId>

core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
3232
val pythonPath = new ArrayBuffer[String]
3333
for (sparkHome <- sys.env.get("SPARK_HOME")) {
3434
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
35-
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)
35+
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.9-src.zip").mkString(File.separator)
3636
}
3737
pythonPath ++= SparkContext.jarOfObject(this)
3838
pythonPath.mkString(File.pathSeparator)

docs/running-on-yarn.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,17 @@ all environment variables used for launching each container. This process is use
8181
classpath problems in particular. (Note that enabling this requires admin privileges on cluster
8282
settings and a restart of all node managers. Thus, this is not applicable to hosted clusters).
8383

84-
To use a custom log4j configuration for the application master or executors, there are two options:
84+
To use a custom log4j configuration for the application master or executors, here are the options:
8585

8686
- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files
8787
to be uploaded with the application.
8888
- add `-Dlog4j.configuration=<location of configuration file>` to `spark.driver.extraJavaOptions`
8989
(for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file,
9090
the `file:` protocol should be explicitly provided, and the file needs to exist locally on all
9191
the nodes.
92+
- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along
93+
with the other configurations. Note that other 2 options has higher priority than this option if
94+
multiple options are specified.
9295

9396
Note that for the first option, both executors and the application master will share the same
9497
log4j configuration, which may cause issues when they run on the same node (e.g. trying to write

mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.ml.evaluation
1919

20-
import org.apache.spark.annotation.Experimental
20+
import org.apache.spark.annotation.{Experimental, Since}
2121
import org.apache.spark.ml.param._
2222
import org.apache.spark.ml.param.shared._
2323
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@@ -30,44 +30,53 @@ import org.apache.spark.sql.types.DoubleType
3030
* :: Experimental ::
3131
* Evaluator for binary classification, which expects two input columns: rawPrediction and label.
3232
*/
33+
@Since("1.2.0")
3334
@Experimental
34-
class BinaryClassificationEvaluator(override val uid: String)
35+
class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
3536
extends Evaluator with HasRawPredictionCol with HasLabelCol {
3637

38+
@Since("1.2.0")
3739
def this() = this(Identifiable.randomUID("binEval"))
3840

3941
/**
4042
* param for metric name in evaluation
4143
* Default: areaUnderROC
4244
* @group param
4345
*/
46+
@Since("1.2.0")
4447
val metricName: Param[String] = {
4548
val allowedParams = ParamValidators.inArray(Array("areaUnderROC", "areaUnderPR"))
4649
new Param(
4750
this, "metricName", "metric name in evaluation (areaUnderROC|areaUnderPR)", allowedParams)
4851
}
4952

5053
/** @group getParam */
54+
@Since("1.2.0")
5155
def getMetricName: String = $(metricName)
5256

5357
/** @group setParam */
58+
@Since("1.2.0")
5459
def setMetricName(value: String): this.type = set(metricName, value)
5560

5661
/** @group setParam */
62+
@Since("1.5.0")
5763
def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
5864

5965
/**
6066
* @group setParam
6167
* @deprecated use [[setRawPredictionCol()]] instead
6268
*/
6369
@deprecated("use setRawPredictionCol instead", "1.5.0")
70+
@Since("1.2.0")
6471
def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
6572

6673
/** @group setParam */
74+
@Since("1.2.0")
6775
def setLabelCol(value: String): this.type = set(labelCol, value)
6876

6977
setDefault(metricName -> "areaUnderROC")
7078

79+
@Since("1.2.0")
7180
override def evaluate(dataset: DataFrame): Double = {
7281
val schema = dataset.schema
7382
SchemaUtils.checkColumnType(schema, $(rawPredictionCol), new VectorUDT)
@@ -87,10 +96,12 @@ class BinaryClassificationEvaluator(override val uid: String)
8796
metric
8897
}
8998

99+
@Since("1.5.0")
90100
override def isLargerBetter: Boolean = $(metricName) match {
91101
case "areaUnderROC" => true
92102
case "areaUnderPR" => true
93103
}
94104

105+
@Since("1.4.1")
95106
override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra)
96107
}

mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
package org.apache.spark.ml.evaluation
1919

20-
import org.apache.spark.annotation.DeveloperApi
20+
import org.apache.spark.annotation.{DeveloperApi, Since}
2121
import org.apache.spark.ml.param.{ParamMap, Params}
2222
import org.apache.spark.sql.DataFrame
2323

2424
/**
2525
* :: DeveloperApi ::
2626
* Abstract class for evaluators that compute metrics from predictions.
2727
*/
28+
@Since("1.5.0")
2829
@DeveloperApi
2930
abstract class Evaluator extends Params {
3031

@@ -35,6 +36,7 @@ abstract class Evaluator extends Params {
3536
* @param paramMap parameter map that specifies the input columns and output metrics
3637
* @return metric
3738
*/
39+
@Since("1.5.0")
3840
def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
3941
this.copy(paramMap).evaluate(dataset)
4042
}
@@ -44,14 +46,17 @@ abstract class Evaluator extends Params {
4446
* @param dataset a dataset that contains labels/observations and predictions.
4547
* @return metric
4648
*/
49+
@Since("1.5.0")
4750
def evaluate(dataset: DataFrame): Double
4851

4952
/**
5053
* Indicates whether the metric returned by [[evaluate()]] should be maximized (true, default)
5154
* or minimized (false).
5255
* A given evaluator may support multiple metrics which may be maximized or minimized.
5356
*/
57+
@Since("1.5.0")
5458
def isLargerBetter: Boolean = true
5559

60+
@Since("1.5.0")
5661
override def copy(extra: ParamMap): Evaluator
5762
}

mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.ml.evaluation
1919

20-
import org.apache.spark.annotation.Experimental
20+
import org.apache.spark.annotation.{Experimental, Since}
2121
import org.apache.spark.ml.param.{ParamMap, ParamValidators, Param}
2222
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
2323
import org.apache.spark.ml.util.{SchemaUtils, Identifiable}
@@ -29,17 +29,20 @@ import org.apache.spark.sql.types.DoubleType
2929
* :: Experimental ::
3030
* Evaluator for multiclass classification, which expects two input columns: score and label.
3131
*/
32+
@Since("1.5.0")
3233
@Experimental
33-
class MulticlassClassificationEvaluator (override val uid: String)
34+
class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String)
3435
extends Evaluator with HasPredictionCol with HasLabelCol {
3536

37+
@Since("1.5.0")
3638
def this() = this(Identifiable.randomUID("mcEval"))
3739

3840
/**
3941
* param for metric name in evaluation (supports `"f1"` (default), `"precision"`, `"recall"`,
4042
* `"weightedPrecision"`, `"weightedRecall"`)
4143
* @group param
4244
*/
45+
@Since("1.5.0")
4346
val metricName: Param[String] = {
4447
val allowedParams = ParamValidators.inArray(Array("f1", "precision",
4548
"recall", "weightedPrecision", "weightedRecall"))
@@ -48,19 +51,24 @@ class MulticlassClassificationEvaluator (override val uid: String)
4851
}
4952

5053
/** @group getParam */
54+
@Since("1.5.0")
5155
def getMetricName: String = $(metricName)
5256

5357
/** @group setParam */
58+
@Since("1.5.0")
5459
def setMetricName(value: String): this.type = set(metricName, value)
5560

5661
/** @group setParam */
62+
@Since("1.5.0")
5763
def setPredictionCol(value: String): this.type = set(predictionCol, value)
5864

5965
/** @group setParam */
66+
@Since("1.5.0")
6067
def setLabelCol(value: String): this.type = set(labelCol, value)
6168

6269
setDefault(metricName -> "f1")
6370

71+
@Since("1.5.0")
6472
override def evaluate(dataset: DataFrame): Double = {
6573
val schema = dataset.schema
6674
SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
@@ -81,6 +89,7 @@ class MulticlassClassificationEvaluator (override val uid: String)
8189
metric
8290
}
8391

92+
@Since("1.5.0")
8493
override def isLargerBetter: Boolean = $(metricName) match {
8594
case "f1" => true
8695
case "precision" => true
@@ -89,5 +98,6 @@ class MulticlassClassificationEvaluator (override val uid: String)
8998
case "weightedRecall" => true
9099
}
91100

101+
@Since("1.5.0")
92102
override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra)
93103
}

0 commit comments

Comments
 (0)