apache · srowen · Nov 1, 2018 · Nov 6, 2018 · Nov 6, 2018 · Nov 7, 2018
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -194,6 +194,7 @@ exportMethods("%<=>%",
               "acos",
               "add_months",
               "alias",
+              "approx_count_distinct",
               "approxCountDistinct",
               "approxQuantile",
               "array_contains",
@@ -252,6 +253,7 @@ exportMethods("%<=>%",
               "dayofweek",
               "dayofyear",
               "decode",
+              "degrees",
               "dense_rank",
               "desc",
               "element_at",
@@ -334,6 +336,7 @@ exportMethods("%<=>%",
               "posexplode",
               "posexplode_outer",
               "quarter",
+              "radians",
               "rand",
               "randn",
               "rank",

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -112,7 +112,7 @@ NULL
 #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
 #' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp),
 #'                   v3 = bround(df$wt, 1), v4 = bin(df$cyl),
-#'                   v5 = hex(df$wt), v6 = toDegrees(df$gear),
+#'                   v5 = hex(df$wt), v6 = degrees(df$gear),
 #'                   v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am),
 #'                   v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1),
 #'                   v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5),
@@ -320,23 +320,37 @@ setMethod("acos",
           })
 
 #' @details
-#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
+#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
-#' @aliases approxCountDistinct approxCountDistinct,Column-method
+#' @aliases approx_count_distinct approx_count_distinct,Column-method
 #' @examples
 #'
 #' \dontrun{
-#' head(select(df, approxCountDistinct(df$gear)))
-#' head(select(df, approxCountDistinct(df$gear, 0.02)))
+#' head(select(df, approx_count_distinct(df$gear)))
+#' head(select(df, approx_count_distinct(df$gear, 0.02)))
 #' head(select(df, countDistinct(df$gear, df$cyl)))
 #' head(select(df, n_distinct(df$gear)))
 #' head(distinct(select(df, "gear")))}
+#' @note approx_count_distinct(Column) since 3.0.0
+setMethod("approx_count_distinct",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases approxCountDistinct approxCountDistinct,Column-method
 #' @note approxCountDistinct(Column) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
+            .Deprecated("approx_count_distinct")
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
             column(jc)
           })
 
@@ -1651,7 +1665,22 @@ setMethod("tanh",
 setMethod("toDegrees",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
+            .Deprecated("degrees")
+            jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle
+#' measured in degrees.
+#'
+#' @rdname column_math_functions
+#' @aliases degrees degrees,Column-method
+#' @note degrees since 3.0.0
+setMethod("degrees",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
             column(jc)
           })
 
@@ -1665,7 +1694,22 @@ setMethod("toDegrees",
 setMethod("toRadians",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
+            .Deprecated("radians")
+            jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle
+#' measured in radians.
+#'
+#' @rdname column_math_functions
+#' @aliases radians radians,Column-method
+#' @note radians since 3.0.0
+setMethod("radians",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
             column(jc)
           })
 
@@ -2065,13 +2109,24 @@ setMethod("pmod", signature(y = "Column"),
 
 #' @param rsd maximum estimation error allowed (default = 0.05).
 #'
+#' @rdname column_aggregate_functions
+#' @aliases approx_count_distinct,Column-method
+#' @note approx_count_distinct(Column, numeric) since 3.0.0
+setMethod("approx_count_distinct",
+          signature(x = "Column"),
+          function(x, rsd = 0.05) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
+            column(jc)
+          })
+
 #' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x, rsd = 0.05) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
+            .Deprecated("approx_count_distinct")
+            jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
             column(jc)
           })
 

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -746,6 +746,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
 #' @name NULL
 setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 
+#' @rdname column_aggregate_functions
+#' @name NULL
+setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") })
+
 #' @rdname column_aggregate_functions
 #' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
@@ -1287,10 +1291,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
 #' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
+#' @rdname column_math_functions
+#' @name NULL
+setGeneric("degrees", function(x) { standardGeneric("degrees") })
+
 #' @rdname column_math_functions
 #' @name NULL
 setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
 
+#' @rdname column_math_functions
+#' @name NULL
+setGeneric("radians", function(x) { standardGeneric("radians") })
+
 #' @rdname column_math_functions
 #' @name NULL
 setGeneric("toRadians", function(x) { standardGeneric("toRadians") })

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1379,7 +1379,7 @@ test_that("column operators", {
 
 test_that("column functions", {
   c <- column("a")
-  c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
+  c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
   c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
   c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
   c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
@@ -1388,7 +1388,7 @@ test_that("column functions", {
   c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
   c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
-  c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
+  c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
   c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)

diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -158,8 +158,6 @@ class BarrierTaskContext private[spark] (
 
   override def isInterrupted(): Boolean = taskContext.isInterrupted()
 
-  override def isRunningLocally(): Boolean = taskContext.isRunningLocally()
-
   override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
     taskContext.addTaskCompletionListener(listener)
     this

diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -96,13 +96,6 @@ abstract class TaskContext extends Serializable {
    */
   def isInterrupted(): Boolean
 
-  /**
-   * Returns true if the task is running locally in the driver program.
-   * @return false
-   */
-  @deprecated("Local execution was removed, so this always returns false", "2.0.0")
-  def isRunningLocally(): Boolean
-
   /**
    * Adds a (Java friendly) listener to be executed on task completion.
    * This will be called in all situations - success, failure, or cancellation. Adding a listener

diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -157,8 +157,6 @@ private[spark] class TaskContextImpl(
   @GuardedBy("this")
   override def isCompleted(): Boolean = synchronized(completed)
 
-  override def isRunningLocally(): Boolean = false
-
   override def isInterrupted(): Boolean = reasonIfKilled.isDefined
 
   override def getLocalProperty(key: String): String = localProperties.getProperty(key)

diff --git a/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala
@@ -56,14 +56,4 @@ class ShuffleWriteMetrics private[spark] () extends Serializable {
   private[spark] def decRecordsWritten(v: Long): Unit = {
     _recordsWritten.setValue(recordsWritten - v)
   }
-
-  // Legacy methods for backward compatibility.
-  // TODO: remove these once we make this class private.
-  @deprecated("use bytesWritten instead", "2.0.0")
-  def shuffleBytesWritten: Long = bytesWritten
-  @deprecated("use writeTime instead", "2.0.0")
-  def shuffleWriteTime: Long = writeTime
-  @deprecated("use recordsWritten instead", "2.0.0")
-  def shuffleRecordsWritten: Long = recordsWritten
-
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
@@ -47,33 +47,3 @@ case class AccumulableInfo private[spark] (
     private[spark] val countFailedValues: Boolean,
     // TODO: use this to identify internal task metrics instead of encoding it in the name
     private[spark] val metadata: Option[String] = None)
-
-
-/**
- * A collection of deprecated constructors. This will be removed soon.
- */
-object AccumulableInfo {
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(
-      id: Long,
-      name: String,
-      update: Option[String],
-      value: String,
-      internal: Boolean): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), update, Option(value), internal, countFailedValues = false)
-  }
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(id: Long, name: String, update: Option[String], value: String): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), update, Option(value), internal = false, countFailedValues = false)
-  }
-
-  @deprecated("do not create AccumulableInfo", "2.0.0")
-  def apply(id: Long, name: String, value: String): AccumulableInfo = {
-    new AccumulableInfo(
-      id, Option(name), None, Option(value), internal = false, countFailedValues = false)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -47,18 +47,6 @@ import org.apache.spark.util.{Utils, VersionUtils}
 private[util] sealed trait BaseReadWrite {
   private var optionSparkSession: Option[SparkSession] = None
 
-  /**
-   * Sets the Spark SQLContext to use for saving/loading.
-   *
-   * @deprecated Use session instead. This method will be removed in 3.0.0.
-   */
-  @Since("1.6.0")
-  @deprecated("Use session instead. This method will be removed in 3.0.0.", "2.0.0")
-  def context(sqlContext: SQLContext): this.type = {
-    optionSparkSession = Option(sqlContext.sparkSession)
-    this
-  }
-
   /**
    * Sets the Spark Session to use for saving/loading.
    */
@@ -215,10 +203,6 @@ abstract class MLWriter extends BaseReadWrite with Logging {
   // override for Java compatibility
   @Since("1.6.0")
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
-
-  // override for Java compatibility
-  @Since("1.6.0")
-  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
@@ -281,9 +265,6 @@ class GeneralMLWriter(stage: PipelineStage) extends MLWriter with Logging {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
-
-  // override for Java compatibility
-  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
@@ -352,9 +333,6 @@ abstract class MLReader[T] extends BaseReadWrite {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
-
-  // override for Java compatibility
-  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -134,31 +134,6 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Doubl
   @Since("1.1.0")
   def fMeasure(label: Double): Double = fMeasure(label, 1.0)
 
-  /**
-   * Returns precision
-   */
-  @Since("1.1.0")
-  @deprecated("Use accuracy.", "2.0.0")
-  lazy val precision: Double = accuracy
-
-  /**
-   * Returns recall
-   * (equals to precision for multiclass classifier
-   * because sum of all false positives is equal to sum
-   * of all false negatives)
-   */
-  @Since("1.1.0")
-  @deprecated("Use accuracy.", "2.0.0")
-  lazy val recall: Double = accuracy
-
-  /**
-   * Returns f-measure
-   * (equals to precision and recall because precision equals recall)
-   */
-  @Since("1.1.0")
-  @deprecated("Use accuracy.", "2.0.0")
-  lazy val fMeasure: Double = accuracy
-
   /**
    * Returns accuracy
    * (equals to the total number of correctly classified instances

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
@@ -77,9 +77,6 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     assert(math.abs(metrics.accuracy -
       (2.0 + 3.0 + 1.0) / ((2 + 3 + 1) + (1 + 1 + 1))) < delta)
-    assert(math.abs(metrics.accuracy - metrics.precision) < delta)
-    assert(math.abs(metrics.accuracy - metrics.recall) < delta)
-    assert(math.abs(metrics.accuracy - metrics.fMeasure) < delta)
     assert(math.abs(metrics.accuracy - metrics.weightedRecall) < delta)
     assert(math.abs(metrics.weightedTruePositiveRate -
       ((4.0 / 9) * tpRate0 + (4.0 / 9) * tpRate1 + (1.0 / 9) * tpRate2)) < delta)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -36,6 +36,24 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.TaskContext.isRunningLocally"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleBytesWritten"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleWriteTime"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleRecordsWritten"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.AccumulableInfo.apply"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.approxCountDistinct"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.toRadians"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.toDegrees"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.monotonicallyIncreasingId"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.clearActive"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.getOrCreate"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.setActive"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.SQLContext.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.fMeasure"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.recall"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.precision"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLWriter.context"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLReader.context"),
     // [SPARK-25737] Remove JavaSparkContextVarargsWorkaround
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.api.java.JavaSparkContext"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.union"),