diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 9d4f05af75afd..de56061b4c1c7 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -194,6 +194,7 @@ exportMethods("%<=>%", "acos", "add_months", "alias", + "approx_count_distinct", "approxCountDistinct", "approxQuantile", "array_contains", @@ -252,6 +253,7 @@ exportMethods("%<=>%", "dayofweek", "dayofyear", "decode", + "degrees", "dense_rank", "desc", "element_at", @@ -334,6 +336,7 @@ exportMethods("%<=>%", "posexplode", "posexplode_outer", "quarter", + "radians", "rand", "randn", "rank", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 9292363d1ad2f..9abb7fc1fadb4 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -112,7 +112,7 @@ NULL #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) #' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp), #' v3 = bround(df$wt, 1), v4 = bin(df$cyl), -#' v5 = hex(df$wt), v6 = toDegrees(df$gear), +#' v5 = hex(df$wt), v6 = degrees(df$gear), #' v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am), #' v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1), #' v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5), @@ -320,23 +320,37 @@ setMethod("acos", }) #' @details -#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group. +#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group. #' #' @rdname column_aggregate_functions -#' @aliases approxCountDistinct approxCountDistinct,Column-method +#' @aliases approx_count_distinct approx_count_distinct,Column-method #' @examples #' #' \dontrun{ -#' head(select(df, approxCountDistinct(df$gear))) -#' head(select(df, approxCountDistinct(df$gear, 0.02))) +#' head(select(df, approx_count_distinct(df$gear))) +#' head(select(df, approx_count_distinct(df$gear, 0.02))) #' head(select(df, countDistinct(df$gear, df$cyl))) #' head(select(df, n_distinct(df$gear))) #' head(distinct(select(df, "gear")))} +#' @note approx_count_distinct(Column) since 3.0.0 +setMethod("approx_count_distinct", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc) + column(jc) + }) + +#' @details +#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group. +#' +#' @rdname column_aggregate_functions +#' @aliases approxCountDistinct approxCountDistinct,Column-method #' @note approxCountDistinct(Column) since 1.4.0 setMethod("approxCountDistinct", signature(x = "Column"), function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc) + .Deprecated("approx_count_distinct") + jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc) column(jc) }) @@ -1651,7 +1665,22 @@ setMethod("tanh", setMethod("toDegrees", signature(x = "Column"), function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc) + .Deprecated("degrees") + jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc) + column(jc) + }) + +#' @details +#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle +#' measured in degrees. +#' +#' @rdname column_math_functions +#' @aliases degrees degrees,Column-method +#' @note degrees since 3.0.0 +setMethod("degrees", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc) column(jc) }) @@ -1665,7 +1694,22 @@ setMethod("toDegrees", setMethod("toRadians", signature(x = "Column"), function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc) + .Deprecated("radians") + jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc) + column(jc) + }) + +#' @details +#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle +#' measured in radians. +#' +#' @rdname column_math_functions +#' @aliases radians radians,Column-method +#' @note radians since 3.0.0 +setMethod("radians", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc) column(jc) }) @@ -2065,13 +2109,24 @@ setMethod("pmod", signature(y = "Column"), #' @param rsd maximum estimation error allowed (default = 0.05). #' +#' @rdname column_aggregate_functions +#' @aliases approx_count_distinct,Column-method +#' @note approx_count_distinct(Column, numeric) since 3.0.0 +setMethod("approx_count_distinct", + signature(x = "Column"), + function(x, rsd = 0.05) { + jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd) + column(jc) + }) + #' @rdname column_aggregate_functions #' @aliases approxCountDistinct,Column-method #' @note approxCountDistinct(Column, numeric) since 1.4.0 setMethod("approxCountDistinct", signature(x = "Column"), function(x, rsd = 0.05) { - jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd) + .Deprecated("approx_count_distinct") + jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd) column(jc) }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 463102c780b52..cbed276274ac1 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -746,6 +746,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy" #' @name NULL setGeneric("add_months", function(y, x) { standardGeneric("add_months") }) +#' @rdname column_aggregate_functions +#' @name NULL +setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") }) + #' @rdname column_aggregate_functions #' @name NULL setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") }) @@ -1287,10 +1291,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst #' @name NULL setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") }) +#' @rdname column_math_functions +#' @name NULL +setGeneric("degrees", function(x) { standardGeneric("degrees") }) + #' @rdname column_math_functions #' @name NULL setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") }) +#' @rdname column_math_functions +#' @name NULL +setGeneric("radians", function(x) { standardGeneric("radians") }) + #' @rdname column_math_functions #' @name NULL setGeneric("toRadians", function(x) { standardGeneric("toRadians") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index faec387ce4eff..059c9f3057242 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1379,7 +1379,7 @@ test_that("column operators", { test_that("column functions", { c <- column("a") - c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c) + c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c) c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c) c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c) @@ -1388,7 +1388,7 @@ test_that("column functions", { c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c) c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id() c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c) - c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c) + c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c) c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c) c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c") c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala index 7ce421e5479ee..6a497afac444d 100644 --- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala +++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala @@ -158,8 +158,6 @@ class BarrierTaskContext private[spark] ( override def isInterrupted(): Boolean = taskContext.isInterrupted() - override def isRunningLocally(): Boolean = taskContext.isRunningLocally() - override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { taskContext.addTaskCompletionListener(listener) this diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala index 2b939dabb1105..959f246f3f9f6 100644 --- a/core/src/main/scala/org/apache/spark/TaskContext.scala +++ b/core/src/main/scala/org/apache/spark/TaskContext.scala @@ -96,13 +96,6 @@ abstract class TaskContext extends Serializable { */ def isInterrupted(): Boolean - /** - * Returns true if the task is running locally in the driver program. - * @return false - */ - @deprecated("Local execution was removed, so this always returns false", "2.0.0") - def isRunningLocally(): Boolean - /** * Adds a (Java friendly) listener to be executed on task completion. * This will be called in all situations - success, failure, or cancellation. Adding a listener diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala index 89730424e5acf..76296c5d0abd3 100644 --- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala +++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala @@ -157,8 +157,6 @@ private[spark] class TaskContextImpl( @GuardedBy("this") override def isCompleted(): Boolean = synchronized(completed) - override def isRunningLocally(): Boolean = false - override def isInterrupted(): Boolean = reasonIfKilled.isDefined override def getLocalProperty(key: String): String = localProperties.getProperty(key) diff --git a/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala index ada2e1bc08593..0c9da657c2b60 100644 --- a/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala +++ b/core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala @@ -56,14 +56,4 @@ class ShuffleWriteMetrics private[spark] () extends Serializable { private[spark] def decRecordsWritten(v: Long): Unit = { _recordsWritten.setValue(recordsWritten - v) } - - // Legacy methods for backward compatibility. - // TODO: remove these once we make this class private. - @deprecated("use bytesWritten instead", "2.0.0") - def shuffleBytesWritten: Long = bytesWritten - @deprecated("use writeTime instead", "2.0.0") - def shuffleWriteTime: Long = writeTime - @deprecated("use recordsWritten instead", "2.0.0") - def shuffleRecordsWritten: Long = recordsWritten - } diff --git a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala index d745345f4e0d2..bd0fe90b1f3b6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala @@ -47,33 +47,3 @@ case class AccumulableInfo private[spark] ( private[spark] val countFailedValues: Boolean, // TODO: use this to identify internal task metrics instead of encoding it in the name private[spark] val metadata: Option[String] = None) - - -/** - * A collection of deprecated constructors. This will be removed soon. - */ -object AccumulableInfo { - - @deprecated("do not create AccumulableInfo", "2.0.0") - def apply( - id: Long, - name: String, - update: Option[String], - value: String, - internal: Boolean): AccumulableInfo = { - new AccumulableInfo( - id, Option(name), update, Option(value), internal, countFailedValues = false) - } - - @deprecated("do not create AccumulableInfo", "2.0.0") - def apply(id: Long, name: String, update: Option[String], value: String): AccumulableInfo = { - new AccumulableInfo( - id, Option(name), update, Option(value), internal = false, countFailedValues = false) - } - - @deprecated("do not create AccumulableInfo", "2.0.0") - def apply(id: Long, name: String, value: String): AccumulableInfo = { - new AccumulableInfo( - id, Option(name), None, Option(value), internal = false, countFailedValues = false) - } -} diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala index 72a60e04360d6..a0ac26a34d8c8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala @@ -47,18 +47,6 @@ import org.apache.spark.util.{Utils, VersionUtils} private[util] sealed trait BaseReadWrite { private var optionSparkSession: Option[SparkSession] = None - /** - * Sets the Spark SQLContext to use for saving/loading. - * - * @deprecated Use session instead. This method will be removed in 3.0.0. - */ - @Since("1.6.0") - @deprecated("Use session instead. This method will be removed in 3.0.0.", "2.0.0") - def context(sqlContext: SQLContext): this.type = { - optionSparkSession = Option(sqlContext.sparkSession) - this - } - /** * Sets the Spark Session to use for saving/loading. */ @@ -215,10 +203,6 @@ abstract class MLWriter extends BaseReadWrite with Logging { // override for Java compatibility @Since("1.6.0") override def session(sparkSession: SparkSession): this.type = super.session(sparkSession) - - // override for Java compatibility - @Since("1.6.0") - override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession) } /** @@ -281,9 +265,6 @@ class GeneralMLWriter(stage: PipelineStage) extends MLWriter with Logging { // override for Java compatibility override def session(sparkSession: SparkSession): this.type = super.session(sparkSession) - - // override for Java compatibility - override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession) } /** @@ -352,9 +333,6 @@ abstract class MLReader[T] extends BaseReadWrite { // override for Java compatibility override def session(sparkSession: SparkSession): this.type = super.session(sparkSession) - - // override for Java compatibility - override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession) } /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 9a6a8dbdccbf3..980e0c92531a2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -134,31 +134,6 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Doubl @Since("1.1.0") def fMeasure(label: Double): Double = fMeasure(label, 1.0) - /** - * Returns precision - */ - @Since("1.1.0") - @deprecated("Use accuracy.", "2.0.0") - lazy val precision: Double = accuracy - - /** - * Returns recall - * (equals to precision for multiclass classifier - * because sum of all false positives is equal to sum - * of all false negatives) - */ - @Since("1.1.0") - @deprecated("Use accuracy.", "2.0.0") - lazy val recall: Double = accuracy - - /** - * Returns f-measure - * (equals to precision and recall because precision equals recall) - */ - @Since("1.1.0") - @deprecated("Use accuracy.", "2.0.0") - lazy val fMeasure: Double = accuracy - /** * Returns accuracy * (equals to the total number of correctly classified instances diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index 142d1e9812ef1..5394baab94bcf 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -77,9 +77,6 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { assert(math.abs(metrics.accuracy - (2.0 + 3.0 + 1.0) / ((2 + 3 + 1) + (1 + 1 + 1))) < delta) - assert(math.abs(metrics.accuracy - metrics.precision) < delta) - assert(math.abs(metrics.accuracy - metrics.recall) < delta) - assert(math.abs(metrics.accuracy - metrics.fMeasure) < delta) assert(math.abs(metrics.accuracy - metrics.weightedRecall) < delta) assert(math.abs(metrics.weightedTruePositiveRate - ((4.0 / 9) * tpRate0 + (4.0 / 9) * tpRate1 + (1.0 / 9) * tpRate2)) < delta) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 350d8ad6942ff..b6bd6b82d94fd 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,24 @@ object MimaExcludes { // Exclude rules for 3.0.x lazy val v30excludes = v24excludes ++ Seq( + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.TaskContext.isRunningLocally"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleBytesWritten"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleWriteTime"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.shuffleRecordsWritten"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.AccumulableInfo.apply"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.approxCountDistinct"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.toRadians"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.toDegrees"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.monotonicallyIncreasingId"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.clearActive"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.getOrCreate"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.setActive"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.SQLContext.this"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.fMeasure"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.recall"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.precision"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLWriter.context"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLReader.context"), // [SPARK-25737] Remove JavaSparkContextVarargsWorkaround ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.api.java.JavaSparkContext"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.union"), diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 0bb0ca37c1ab6..b171e46871fdf 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -229,46 +229,28 @@ def falsePositiveRate(self, label): return self.call("falsePositiveRate", label) @since('1.4.0') - def precision(self, label=None): + def precision(self, label): """ - Returns precision or precision for a given label (category) if specified. + Returns precision. """ - if label is None: - # note:: Deprecated in 2.0.0. Use accuracy. - warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning) - return self.call("precision") - else: - return self.call("precision", float(label)) + return self.call("precision", float(label)) @since('1.4.0') - def recall(self, label=None): + def recall(self, label): """ - Returns recall or recall for a given label (category) if specified. + Returns recall. """ - if label is None: - # note:: Deprecated in 2.0.0. Use accuracy. - warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning) - return self.call("recall") - else: - return self.call("recall", float(label)) + return self.call("recall", float(label)) @since('1.4.0') - def fMeasure(self, label=None, beta=None): + def fMeasure(self, label, beta=None): """ - Returns f-measure or f-measure for a given label (category) if specified. + Returns f-measure. """ if beta is None: - if label is None: - # note:: Deprecated in 2.0.0. Use accuracy. - warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning) - return self.call("fMeasure") - else: - return self.call("fMeasure", label) + return self.call("fMeasure", label) else: - if label is None: - raise Exception("If the beta parameter is specified, label can not be none") - else: - return self.call("fMeasure", label, beta) + return self.call("fMeasure", label, beta) @property @since('2.0.0') diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index fc7809387b13a..51f20db2927e2 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -38,12 +38,10 @@ class MLUtils(object): """ @staticmethod - def _parse_libsvm_line(line, multiclass=None): + def _parse_libsvm_line(line): """ Parses a line in LIBSVM format into (label, indices, values). """ - if multiclass is not None: - warnings.warn("deprecated", DeprecationWarning) items = line.split(None) label = float(items[0]) nnz = len(items) - 1 @@ -73,7 +71,7 @@ def _convert_labeled_point_to_libsvm(p): @staticmethod @since("1.0.0") - def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None): + def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None): """ Loads labeled data in the LIBSVM format into an RDD of LabeledPoint. The LIBSVM format is a text-based format used by @@ -116,8 +114,6 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None LabeledPoint(-1.0, (6,[1,3,5],[4.0,5.0,6.0])) """ from pyspark.mllib.regression import LabeledPoint - if multiclass is not None: - warnings.warn("deprecated", DeprecationWarning) lines = sc.textFile(path, minPartitions) parsed = lines.map(lambda l: MLUtils._parse_libsvm_line(l)) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index bf6b990487617..5748f6c6bd5eb 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -119,25 +119,6 @@ def toJSON(self, use_unicode=True): rdd = self._jdf.toJSON() return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode)) - @since(1.3) - def registerTempTable(self, name): - """Registers this DataFrame as a temporary table using the given name. - - The lifetime of this temporary table is tied to the :class:`SparkSession` - that was used to create this :class:`DataFrame`. - - >>> df.registerTempTable("people") - >>> df2 = spark.sql("select * from people") - >>> sorted(df.collect()) == sorted(df2.collect()) - True - >>> spark.catalog.dropTempView("people") - - .. note:: Deprecated in 2.0, use createOrReplaceTempView instead. - """ - warnings.warn( - "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning) - self._jdf.createOrReplaceTempView(name) - @since(2.0) def createTempView(self, name): """Creates a local temporary view with this DataFrame. @@ -1462,20 +1443,6 @@ def union(self, other): """ return DataFrame(self._jdf.union(other._jdf), self.sql_ctx) - @since(1.3) - def unionAll(self, other): - """ Return a new :class:`DataFrame` containing union of rows in this and another frame. - - This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union - (that does deduplication of elements), use this function followed by :func:`distinct`. - - Also as standard in SQL, this function resolves columns by position (not by name). - - .. note:: Deprecated in 2.0, use :func:`union` instead. - """ - warnings.warn("Deprecated in 2.0, use union instead.", DeprecationWarning) - return self.union(other) - @since(2.3) def unionByName(self, other): """ Returns a new :class:`DataFrame` containing union of rows in this and another frame. diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 24824efb47362..e86749cc15c35 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -252,8 +252,6 @@ def _(): # Wraps deprecated functions (keys) with the messages (values). _functions_deprecated = { - 'toDegrees': 'Deprecated in 2.1, use degrees instead.', - 'toRadians': 'Deprecated in 2.1, use radians instead.', } for _name, _doc in _functions.items(): @@ -275,15 +273,6 @@ def _(): del _name, _doc -@since(1.3) -def approxCountDistinct(col, rsd=None): - """ - .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead. - """ - warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning) - return approx_count_distinct(col, rsd) - - @since(2.1) def approx_count_distinct(col, rsd=None): """Aggregate function: returns a new :class:`Column` for approximate distinct count of diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index ad04270c1a361..ea0269162d62a 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1610,7 +1610,7 @@ def test_aggregator(self): from pyspark.sql import functions self.assertEqual((0, u'99'), tuple(g.agg(functions.first(df.key), functions.last(df.value)).first())) - self.assertTrue(95 < g.agg(functions.approxCountDistinct(df.key)).first()[0]) + self.assertTrue(95 < g.agg(functions.approx_count_distinct(df.key)).first()[0]) self.assertEqual(100, g.agg(functions.countDistinct(df.value)).first()[0]) def test_first_last_ignorenulls(self): diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py index 7f29646c07432..951af45bb3227 100644 --- a/python/pyspark/storagelevel.py +++ b/python/pyspark/storagelevel.py @@ -56,16 +56,3 @@ def __str__(self): StorageLevel.MEMORY_AND_DISK = StorageLevel(True, True, False, False) StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, False, 2) StorageLevel.OFF_HEAP = StorageLevel(True, True, True, False, 1) - -""" -.. note:: The following four storage level constants are deprecated in 2.0, since the records - will always be serialized in Python. -""" -StorageLevel.MEMORY_ONLY_SER = StorageLevel.MEMORY_ONLY -""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_ONLY`` instead.""" -StorageLevel.MEMORY_ONLY_SER_2 = StorageLevel.MEMORY_ONLY_2 -""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_ONLY_2`` instead.""" -StorageLevel.MEMORY_AND_DISK_SER = StorageLevel.MEMORY_AND_DISK -""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_AND_DISK`` instead.""" -StorageLevel.MEMORY_AND_DISK_SER_2 = StorageLevel.MEMORY_AND_DISK_2 -""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_AND_DISK_2`` instead.""" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 3a094079380fd..48105571b2798 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -246,7 +246,7 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("toDegrees") { testUnary(ToDegrees, math.toDegrees) - checkConsistencyBetweenInterpretedAndCodegen(Acos, DoubleType) + checkConsistencyBetweenInterpretedAndCodegen(ToDegrees, DoubleType) } test("toRadians") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index a046127c3edb4..a9a19aa8a1001 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -302,24 +302,6 @@ class Column(val expr: Expression) extends Logging { */ def =!= (other: Any): Column = withExpr{ Not(EqualTo(expr, lit(other).expr)) } - /** - * Inequality test. - * {{{ - * // Scala: - * df.select( df("colA") !== df("colB") ) - * df.select( !(df("colA") === df("colB")) ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.filter( col("colA").notEqual(col("colB")) ); - * }}} - * - * @group expr_ops - * @since 1.3.0 - */ - @deprecated("!== does not have the same precedence as ===, use =!= instead", "2.0.0") - def !== (other: Any): Column = this =!= other - /** * Inequality test. * {{{ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c91b0d778fab1..33d6dfc45bede 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1810,20 +1810,6 @@ class Dataset[T] private[sql]( Limit(Literal(n), logicalPlan) } - /** - * Returns a new Dataset containing union of rows in this Dataset and another Dataset. - * - * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does - * deduplication of elements), use this function followed by a [[distinct]]. - * - * Also as standard in SQL, this function resolves columns by position (not by name). - * - * @group typedrel - * @since 2.0.0 - */ - @deprecated("use union()", "2.0.0") - def unionAll(other: Dataset[T]): Dataset[T] = union(other) - /** * Returns a new Dataset containing union of rows in this Dataset and another Dataset. * @@ -2123,90 +2109,6 @@ class Dataset[T] private[sql]( randomSplit(weights.toArray, seed) } - /** - * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more - * rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of - * the input row are implicitly joined with each row that is output by the function. - * - * Given that this is deprecated, as an alternative, you can explode columns either using - * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count - * the number of books that contain a given word: - * - * {{{ - * case class Book(title: String, words: String) - * val ds: Dataset[Book] - * - * val allWords = ds.select('title, explode(split('words, " ")).as("word")) - * - * val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title")) - * }}} - * - * Using `flatMap()` this can similarly be exploded as: - * - * {{{ - * ds.flatMap(_.words.split(" ")) - * }}} - * - * @group untypedrel - * @since 2.0.0 - */ - @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0") - def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = { - val elementSchema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] - - val convert = CatalystTypeConverters.createToCatalystConverter(elementSchema) - - val rowFunction = - f.andThen(_.map(convert(_).asInstanceOf[InternalRow])) - val generator = UserDefinedGenerator(elementSchema, rowFunction, input.map(_.expr)) - - withPlan { - Generate(generator, unrequiredChildIndex = Nil, outer = false, - qualifier = None, generatorOutput = Nil, logicalPlan) - } - } - - /** - * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero - * or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All - * columns of the input row are implicitly joined with each value that is output by the function. - * - * Given that this is deprecated, as an alternative, you can explode columns either using - * `functions.explode()`: - * - * {{{ - * ds.select(explode(split('words, " ")).as("word")) - * }}} - * - * or `flatMap()`: - * - * {{{ - * ds.flatMap(_.words.split(" ")) - * }}} - * - * @group untypedrel - * @since 2.0.0 - */ - @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0") - def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B]) - : DataFrame = { - val dataType = ScalaReflection.schemaFor[B].dataType - val attributes = AttributeReference(outputColumn, dataType)() :: Nil - // TODO handle the metadata? - val elementSchema = attributes.toStructType - - def rowFunction(row: Row): TraversableOnce[InternalRow] = { - val convert = CatalystTypeConverters.createToCatalystConverter(dataType) - f(row(0).asInstanceOf[A]).map(o => InternalRow(convert(o))) - } - val generator = UserDefinedGenerator(elementSchema, rowFunction, apply(inputColumn).expr :: Nil) - - withPlan { - Generate(generator, unrequiredChildIndex = Nil, outer = false, - qualifier = None, generatorOutput = Nil, logicalPlan) - } - } - /** * Returns a new Dataset by adding a column or replacing the existing column that has * the same name. @@ -3054,18 +2956,6 @@ class Dataset[T] private[sql]( */ def javaRDD: JavaRDD[T] = toJavaRDD - /** - * Registers this Dataset as a temporary table using the given name. The lifetime of this - * temporary table is tied to the [[SparkSession]] that was used to create this Dataset. - * - * @group basic - * @since 1.6.0 - */ - @deprecated("Use createOrReplaceTempView(viewName) instead.", "2.0.0") - def registerTempTable(tableName: String): Unit = { - createOrReplaceTempView(tableName) - } - /** * Creates a local temporary view using the given name. The lifetime of this * temporary view is tied to the [[SparkSession]] that was used to create this Dataset. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 1b7e969a7192e..9982b60fefe60 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -24,7 +24,7 @@ import scala.reflect.runtime.universe.TypeTag import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.annotation.{DeveloperApi, Experimental, InterfaceStability} -import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} +import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.rdd.RDD @@ -64,15 +64,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) // Note: Since Spark 2.0 this class has become a wrapper of SparkSession, where the // real functionality resides. This class remains mainly for backward compatibility. - - @deprecated("Use SparkSession.builder instead", "2.0.0") - def this(sc: SparkContext) = { - this(SparkSession.builder().sparkContext(sc).getOrCreate()) - } - - @deprecated("Use SparkSession.builder instead", "2.0.0") - def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) - // TODO: move this logic into SparkSession private[sql] def sessionState: SessionState = sparkSession.sessionState @@ -767,45 +758,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) */ object SQLContext { - /** - * Get the singleton SQLContext if it exists or create a new one using the given SparkContext. - * - * This function can be used to create a singleton SQLContext object that can be shared across - * the JVM. - * - * If there is an active SQLContext for current thread, it will be returned instead of the global - * one. - * - * @since 1.5.0 - */ - @deprecated("Use SparkSession.builder instead", "2.0.0") - def getOrCreate(sparkContext: SparkContext): SQLContext = { - SparkSession.builder().sparkContext(sparkContext).getOrCreate().sqlContext - } - - /** - * Changes the SQLContext that will be returned in this thread and its children when - * SQLContext.getOrCreate() is called. This can be used to ensure that a given thread receives - * a SQLContext with an isolated session, instead of the global (first created) context. - * - * @since 1.6.0 - */ - @deprecated("Use SparkSession.setActiveSession instead", "2.0.0") - def setActive(sqlContext: SQLContext): Unit = { - SparkSession.setActiveSession(sqlContext.sparkSession) - } - - /** - * Clears the active SQLContext for current thread. Subsequent calls to getOrCreate will - * return the first created context instead of a thread-local override. - * - * @since 1.6.0 - */ - @deprecated("Use SparkSession.clearActiveSession instead", "2.0.0") - def clearActive(): Unit = { - SparkSession.clearActiveSession() - } - /** * Converts an iterator of Java Beans to InternalRow using the provided * bean info & schema. This is not related to the singleton, but is a static diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala index 650ffd4586592..3e637d594caf3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala @@ -80,7 +80,7 @@ object typed { // TODO: // stddevOf: Double // varianceOf: Double - // approxCountDistinct: Long + // approx_count_distinct: Long // minOf: T // maxOf: T diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 6bb1a490d8c3a..b2a6e22cbfc86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -206,36 +206,6 @@ object functions { // Aggregate functions ////////////////////////////////////////////////////////////////////////////////////////////// - /** - * @group agg_funcs - * @since 1.3.0 - */ - @deprecated("Use approx_count_distinct", "2.1.0") - def approxCountDistinct(e: Column): Column = approx_count_distinct(e) - - /** - * @group agg_funcs - * @since 1.3.0 - */ - @deprecated("Use approx_count_distinct", "2.1.0") - def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName) - - /** - * @group agg_funcs - * @since 1.3.0 - */ - @deprecated("Use approx_count_distinct", "2.1.0") - def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd) - - /** - * @group agg_funcs - * @since 1.3.0 - */ - @deprecated("Use approx_count_distinct", "2.1.0") - def approxCountDistinct(columnName: String, rsd: Double): Column = { - approx_count_distinct(Column(columnName), rsd) - } - /** * Aggregate function: returns the approximate number of distinct items in a group. * @@ -1114,27 +1084,6 @@ object functions { */ def isnull(e: Column): Column = withExpr { IsNull(e.expr) } - /** - * A column expression that generates monotonically increasing 64-bit integers. - * - * The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. - * The current implementation puts the partition ID in the upper 31 bits, and the record number - * within each partition in the lower 33 bits. The assumption is that the data frame has - * less than 1 billion partitions, and each partition has less than 8 billion records. - * - * As an example, consider a `DataFrame` with two partitions, each with 3 records. - * This expression would return the following IDs: - * - * {{{ - * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. - * }}} - * - * @group normal_funcs - * @since 1.4.0 - */ - @deprecated("Use monotonically_increasing_id()", "2.0.0") - def monotonicallyIncreasingId(): Column = monotonically_increasing_id() - /** * A column expression that generates monotonically increasing 64-bit integers. * @@ -2116,20 +2065,6 @@ object functions { */ def tanh(columnName: String): Column = tanh(Column(columnName)) - /** - * @group math_funcs - * @since 1.4.0 - */ - @deprecated("Use degrees", "2.1.0") - def toDegrees(e: Column): Column = degrees(e) - - /** - * @group math_funcs - * @since 1.4.0 - */ - @deprecated("Use degrees", "2.1.0") - def toDegrees(columnName: String): Column = degrees(Column(columnName)) - /** * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. * @@ -2152,20 +2087,6 @@ object functions { */ def degrees(columnName: String): Column = degrees(Column(columnName)) - /** - * @group math_funcs - * @since 1.4.0 - */ - @deprecated("Use radians", "2.1.0") - def toRadians(e: Column): Column = radians(e) - - /** - * @group math_funcs - * @since 1.4.0 - */ - @deprecated("Use radians", "2.1.0") - def toRadians(columnName: String): Column = radians(Column(columnName)) - /** * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. * diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 4afae56ecdb76..edde9bfd088cf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -220,31 +220,6 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { dfAlias.col("t2.c") } - test("simple explode") { - val df = Seq(Tuple1("a b c"), Tuple1("d e")).toDF("words") - - checkAnswer( - df.explode("words", "word") { word: String => word.split(" ").toSeq }.select('word), - Row("a") :: Row("b") :: Row("c") :: Row("d") ::Row("e") :: Nil - ) - } - - test("explode") { - val df = Seq((1, "a b c"), (2, "a b"), (3, "a")).toDF("number", "letters") - val df2 = - df.explode('letters) { - case Row(letters: String) => letters.split(" ").map(Tuple1(_)).toSeq - } - - checkAnswer( - df2 - .select('_1 as 'letter, 'number) - .groupBy('letter) - .agg(countDistinct('number)), - Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil - ) - } - test("Star Expansion - CreateStruct and CreateArray") { val structDf = testData2.select("a", "b").as("record") // CreateStruct and CreateArray in aggregateExpressions @@ -280,24 +255,18 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("Star Expansion - explode should fail with a meaningful message if it takes a star") { - val df = Seq(("1", "1,2"), ("2", "4"), ("3", "7,8,9")).toDF("prefix", "csv") + val df = Seq(("1,2"), ("4"), ("7,8,9")).toDF("csv") val e = intercept[AnalysisException] { - df.explode($"*") { case Row(prefix: String, csv: String) => - csv.split(",").map(v => Tuple1(prefix + ":" + v)).toSeq - }.queryExecution.assertAnalyzed() - } - assert(e.getMessage.contains("Invalid usage of '*' in explode/json_tuple/UDTF")) - - checkAnswer( - df.explode('prefix, 'csv) { case Row(prefix: String, csv: String) => - csv.split(",").map(v => Tuple1(prefix + ":" + v)).toSeq - }, - Row("1", "1,2", "1:1") :: - Row("1", "1,2", "1:2") :: - Row("2", "4", "2:4") :: - Row("3", "7,8,9", "3:7") :: - Row("3", "7,8,9", "3:8") :: - Row("3", "7,8,9", "3:9") :: Nil) + df.select(explode($"*")) + } + assert(e.getMessage.contains("Invalid usage of '*' in expression 'explode'")) + } + + test("explode on output of array-valued function") { + val df = Seq(("1,2"), ("4"), ("7,8,9")).toDF("csv") + checkAnswer( + df.select(explode(split($"csv", ","))), + Row("1") :: Row("2") :: Row("4") :: Row("7") :: Row("8") :: Row("9") :: Nil) } test("Star Expansion - explode alias and star") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala index a1799829932b8..aab2ae4afc7f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala @@ -24,32 +24,14 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} -@deprecated("This suite is deprecated to silent compiler deprecation warnings", "2.0.0") class SQLContextSuite extends SparkFunSuite with SharedSparkContext { object DummyRule extends Rule[LogicalPlan] { def apply(p: LogicalPlan): LogicalPlan = p } - test("getOrCreate instantiates SQLContext") { - val sqlContext = SQLContext.getOrCreate(sc) - assert(sqlContext != null, "SQLContext.getOrCreate returned null") - assert(SQLContext.getOrCreate(sc).eq(sqlContext), - "SQLContext created by SQLContext.getOrCreate not returned by SQLContext.getOrCreate") - } - - test("getOrCreate return the original SQLContext") { - val sqlContext = SQLContext.getOrCreate(sc) - val newSession = sqlContext.newSession() - assert(SQLContext.getOrCreate(sc).eq(sqlContext), - "SQLContext.getOrCreate after explicitly created SQLContext did not return the context") - SparkSession.setActiveSession(newSession.sparkSession) - assert(SQLContext.getOrCreate(sc).eq(newSession), - "SQLContext.getOrCreate after explicitly setActive() did not return the active context") - } - test("Sessions of SQLContext") { - val sqlContext = SQLContext.getOrCreate(sc) + val sqlContext = SparkSession.builder().sparkContext(sc).getOrCreate().sqlContext val session1 = sqlContext.newSession() val session2 = sqlContext.newSession() @@ -77,13 +59,13 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext { } test("Catalyst optimization passes are modifiable at runtime") { - val sqlContext = SQLContext.getOrCreate(sc) + val sqlContext = SparkSession.builder().sparkContext(sc).getOrCreate().sqlContext sqlContext.experimental.extraOptimizations = Seq(DummyRule) assert(sqlContext.sessionState.optimizer.batches.flatMap(_.rules).contains(DummyRule)) } test("get all tables") { - val sqlContext = SQLContext.getOrCreate(sc) + val sqlContext = SparkSession.builder().sparkContext(sc).getOrCreate().sqlContext val df = sqlContext.range(10) df.createOrReplaceTempView("listtablessuitetable") assert( @@ -100,7 +82,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext { } test("getting all tables with a database name has no impact on returned table names") { - val sqlContext = SQLContext.getOrCreate(sc) + val sqlContext = SparkSession.builder().sparkContext(sc).getOrCreate().sqlContext val df = sqlContext.range(10) df.createOrReplaceTempView("listtablessuitetable") assert( @@ -117,7 +99,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext { } test("query the returned DataFrame of tables") { - val sqlContext = SQLContext.getOrCreate(sc) + val sqlContext = SparkSession.builder().sparkContext(sc).getOrCreate().sqlContext val df = sqlContext.range(10) df.createOrReplaceTempView("listtablessuitetable") @@ -127,7 +109,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext { StructField("isTemporary", BooleanType, false) :: Nil) Seq(sqlContext.tables(), sqlContext.sql("SHOW TABLes")).foreach { - case tableDF => + tableDF => assert(expectedSchema === tableDF.schema) tableDF.createOrReplaceTempView("tables")