From fa92cc10df02345f114c33ccb9cd7846a94236f9 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 23 Jul 2016 13:47:20 -0700 Subject: [PATCH 1/4] remove deprecated functions, and back compat dispatching for omitting sparkContext or sqlContext parameters --- R/pkg/R/DataFrame.R | 53 ------------ R/pkg/R/SQLContext.R | 200 ++++--------------------------------------- R/pkg/R/sparkR.R | 145 +------------------------------ 3 files changed, 18 insertions(+), 380 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 2e99aa026da55..b1c1984743d54 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -483,34 +483,6 @@ setMethod("createOrReplaceTempView", invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) -#' (Deprecated) Register Temporary Table -#' -#' Registers a SparkDataFrame as a Temporary Table in the SparkSession -#' @param x A SparkDataFrame -#' @param tableName A character vector containing the name of the table -#' -#' @family SparkDataFrame functions -#' @seealso \link{createOrReplaceTempView} -#' @rdname registerTempTable-deprecated -#' @name registerTempTable -#' @aliases registerTempTable,SparkDataFrame,character-method -#' @export -#' @examples -#'\dontrun{ -#' sparkR.session() -#' path <- "path/to/file.json" -#' df <- read.json(path) -#' registerTempTable(df, "json_df") -#' new_df <- sql("SELECT * FROM json_df") -#'} -#' @note registerTempTable since 1.4.0 -setMethod("registerTempTable", - signature(x = "SparkDataFrame", tableName = "character"), - function(x, tableName) { - .Deprecated("createOrReplaceTempView") - invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName)) - }) - #' insertInto #' #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession. @@ -792,18 +764,6 @@ setMethod("write.parquet", invisible(callJMethod(write, "parquet", path)) }) -#' @rdname write.parquet -#' @name saveAsParquetFile -#' @aliases saveAsParquetFile,SparkDataFrame,character-method -#' @export -#' @note saveAsParquetFile since 1.4.0 -setMethod("saveAsParquetFile", - signature(x = "SparkDataFrame", path = "character"), - function(x, path) { - .Deprecated("write.parquet") - write.parquet(x, path) - }) - #' Save the content of SparkDataFrame in a text file at the specified path. #' #' Save the content of the SparkDataFrame in a text file at the specified path. @@ -2448,19 +2408,6 @@ setMethod("union", dataFrame(unioned) }) -#' unionAll is deprecated - use union instead -#' @rdname union -#' @name unionAll -#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method -#' @export -#' @note unionAll since 1.4.0 -setMethod("unionAll", - signature(x = "SparkDataFrame", y = "SparkDataFrame"), - function(x, y) { - .Deprecated("union") - union(x, y) - }) - #' Union two or more SparkDataFrames #' #' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index a14bcd91b3eac..fb73f2f1bccc9 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -37,37 +37,6 @@ getInternalType <- function(x) { stop(paste("Unsupported type for SparkDataFrame:", class(x)))) } -#' Temporary function to reroute old S3 Method call to new -#' This function is specifically implemented to remove SQLContext from the parameter list. -#' It determines the target to route the call by checking the parent of this callsite (say 'func'). -#' The target should be called 'func.default'. -#' We need to check the class of x to ensure it is SQLContext/HiveContext before dispatching. -#' @param newFuncSig name of the function the user should call instead in the deprecation message -#' @param x the first parameter of the original call -#' @param ... the rest of parameter to pass along -#' @return whatever the target returns -#' @noRd -dispatchFunc <- function(newFuncSig, x, ...) { - # When called with SparkR::createDataFrame, sys.call()[[1]] returns c(::, SparkR, createDataFrame) - callsite <- as.character(sys.call(sys.parent())[[1]]) - funcName <- callsite[[length(callsite)]] - f <- get(paste0(funcName, ".default")) - # Strip sqlContext from list of parameters and then pass the rest along. - contextNames <- c("org.apache.spark.sql.SQLContext", - "org.apache.spark.sql.hive.HiveContext", - "org.apache.spark.sql.hive.test.TestHiveContext", - "org.apache.spark.sql.SparkSession") - if (missing(x) && length(list(...)) == 0) { - f() - } else if (class(x) == "jobj" && - any(grepl(paste(contextNames, collapse = "|"), getClassName.jobj(x)))) { - .Deprecated(newFuncSig, old = paste0(funcName, "(sqlContext...)")) - f(...) - } else { - f(x, ...) - } -} - #' return the SparkSession #' @noRd getSparkSession <- function() { @@ -178,10 +147,9 @@ getDefaultSqlSource <- function() { #' df3 <- createDataFrame(iris) #' } #' @name createDataFrame -#' @method createDataFrame default #' @note createDataFrame since 1.4.0 # TODO(davies): support sampling and infer type from NA -createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { +createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0) { sparkSession <- getSparkSession() if (is.data.frame(data)) { # get the names of columns, they will be put into RDD @@ -256,26 +224,14 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { dataFrame(sdf) } -createDataFrame <- function(x, ...) { - dispatchFunc("createDataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...) -} - #' @rdname createDataFrame #' @aliases createDataFrame #' @export -#' @method as.DataFrame default #' @note as.DataFrame since 1.6.0 -as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { +as.DataFrame <- function(data, schema = NULL, samplingRatio = 1.0) { createDataFrame(data, schema, samplingRatio) } -#' @rdname createDataFrame -#' @aliases as.DataFrame -#' @export -as.DataFrame <- function(x, ...) { - dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...) -} - #' toDF #' #' Converts an RDD to a SparkDataFrame by infer the types. @@ -314,9 +270,8 @@ setMethod("toDF", signature(x = "RDD"), #' df <- jsonFile(path) #' } #' @name read.json -#' @method read.json default #' @note read.json since 1.6.0 -read.json.default <- function(path) { +read.json <- function(path) { sparkSession <- getSparkSession() # Allow the user to have a more flexible definiton of the text file path paths <- as.list(suppressWarnings(normalizePath(path))) @@ -325,56 +280,6 @@ read.json.default <- function(path) { dataFrame(sdf) } -read.json <- function(x, ...) { - dispatchFunc("read.json(path)", x, ...) -} - -#' @rdname read.json -#' @name jsonFile -#' @export -#' @method jsonFile default -#' @note jsonFile since 1.4.0 -jsonFile.default <- function(path) { - .Deprecated("read.json") - read.json(path) -} - -jsonFile <- function(x, ...) { - dispatchFunc("jsonFile(path)", x, ...) -} - -#' JSON RDD -#' -#' Loads an RDD storing one JSON object per string as a SparkDataFrame. -#' -#' @param sqlContext SQLContext to use -#' @param rdd An RDD of JSON string -#' @param schema A StructType object to use as schema -#' @param samplingRatio The ratio of simpling used to infer the schema -#' @return A SparkDataFrame -#' @noRd -#' @examples -#'\dontrun{ -#' sparkR.session() -#' rdd <- texFile(sc, "path/to/json") -#' df <- jsonRDD(sqlContext, rdd) -#'} - -# TODO: remove - this method is no longer exported -# TODO: support schema -jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { - .Deprecated("read.json") - rdd <- serializeToString(rdd) - if (is.null(schema)) { - read <- callJMethod(sqlContext, "read") - # samplingRatio is deprecated - sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd")) - dataFrame(sdf) - } else { - stop("not implemented") - } -} - #' Create a SparkDataFrame from an ORC file. #' #' Loads an ORC file, returning the result as a SparkDataFrame. @@ -403,9 +308,8 @@ read.orc <- function(path) { #' @rdname read.parquet #' @export #' @name read.parquet -#' @method read.parquet default #' @note read.parquet since 1.6.0 -read.parquet.default <- function(path) { +read.parquet <- function(path) { sparkSession <- getSparkSession() # Allow the user to have a more flexible definiton of the Parquet file path paths <- as.list(suppressWarnings(normalizePath(path))) @@ -414,24 +318,6 @@ read.parquet.default <- function(path) { dataFrame(sdf) } -read.parquet <- function(x, ...) { - dispatchFunc("read.parquet(...)", x, ...) -} - -#' @rdname read.parquet -#' @name parquetFile -#' @export -#' @method parquetFile default -#' @note parquetFile since 1.4.0 -parquetFile.default <- function(...) { - .Deprecated("read.parquet") - read.parquet(unlist(list(...))) -} - -parquetFile <- function(x, ...) { - dispatchFunc("parquetFile(...)", x, ...) -} - #' Create a SparkDataFrame from a text file. #' #' Loads text files and returns a SparkDataFrame whose schema starts with @@ -451,9 +337,8 @@ parquetFile <- function(x, ...) { #' df <- read.text(path) #' } #' @name read.text -#' @method read.text default #' @note read.text since 1.6.1 -read.text.default <- function(path) { +read.text <- function(path) { sparkSession <- getSparkSession() # Allow the user to have a more flexible definiton of the text file path paths <- as.list(suppressWarnings(normalizePath(path))) @@ -462,10 +347,6 @@ read.text.default <- function(path) { dataFrame(sdf) } -read.text <- function(x, ...) { - dispatchFunc("read.text(path)", x, ...) -} - #' SQL Query #' #' Executes a SQL query using Spark, returning the result as a SparkDataFrame. @@ -483,18 +364,13 @@ read.text <- function(x, ...) { #' new_df <- sql("SELECT * FROM table") #' } #' @name sql -#' @method sql default #' @note sql since 1.4.0 -sql.default <- function(sqlQuery) { +sql <- function(sqlQuery) { sparkSession <- getSparkSession() sdf <- callJMethod(sparkSession, "sql", sqlQuery) dataFrame(sdf) } -sql <- function(x, ...) { - dispatchFunc("sql(sqlQuery)", x, ...) -} - #' Create a SparkDataFrame from a SparkSQL Table #' #' Returns the specified Table as a SparkDataFrame. The Table must have already been registered @@ -534,18 +410,13 @@ tableToDF <- function(tableName) { #' tables("hive") #' } #' @name tables -#' @method tables default #' @note tables since 1.4.0 -tables.default <- function(databaseName = NULL) { +tables <- function(databaseName = NULL) { sparkSession <- getSparkSession() jdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getTables", sparkSession, databaseName) dataFrame(jdf) } -tables <- function(x, ...) { - dispatchFunc("tables(databaseName = NULL)", x, ...) -} - #' Table Names #' #' Returns the names of tables in the given database as an array. @@ -560,9 +431,8 @@ tables <- function(x, ...) { #' tableNames("hive") #' } #' @name tableNames -#' @method tableNames default #' @note tableNames since 1.4.0 -tableNames.default <- function(databaseName = NULL) { +tableNames <- function(databaseName = NULL) { sparkSession <- getSparkSession() callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getTableNames", @@ -570,10 +440,6 @@ tableNames.default <- function(databaseName = NULL) { databaseName) } -tableNames <- function(x, ...) { - dispatchFunc("tableNames(databaseName = NULL)", x, ...) -} - #' Cache Table #' #' Caches the specified table in-memory. @@ -591,18 +457,13 @@ tableNames <- function(x, ...) { #' cacheTable("table") #' } #' @name cacheTable -#' @method cacheTable default #' @note cacheTable since 1.4.0 -cacheTable.default <- function(tableName) { +cacheTable <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") callJMethod(catalog, "cacheTable", tableName) } -cacheTable <- function(x, ...) { - dispatchFunc("cacheTable(tableName)", x, ...) -} - #' Uncache Table #' #' Removes the specified table from the in-memory cache. @@ -620,18 +481,13 @@ cacheTable <- function(x, ...) { #' uncacheTable("table") #' } #' @name uncacheTable -#' @method uncacheTable default #' @note uncacheTable since 1.4.0 -uncacheTable.default <- function(tableName) { +uncacheTable <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") callJMethod(catalog, "uncacheTable", tableName) } -uncacheTable <- function(x, ...) { - dispatchFunc("uncacheTable(tableName)", x, ...) -} - #' Clear Cache #' #' Removes all cached tables from the in-memory cache. @@ -643,18 +499,13 @@ uncacheTable <- function(x, ...) { #' clearCache() #' } #' @name clearCache -#' @method clearCache default #' @note clearCache since 1.4.0 -clearCache.default <- function() { +clearCache <- function() { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") callJMethod(catalog, "clearCache") } -clearCache <- function() { - dispatchFunc("clearCache()") -} - #' (Deprecated) Drop Temporary Table #' #' Drops the temporary table with the given table name in the catalog. @@ -672,20 +523,14 @@ clearCache <- function() { #' dropTempTable("table") #' } #' @name dropTempTable -#' @method dropTempTable default #' @note dropTempTable since 1.4.0 -dropTempTable.default <- function(tableName) { +dropTempTable <- function(tableName) { if (class(tableName) != "character") { stop("tableName must be a string.") } dropTempView(tableName) } -dropTempTable <- function(x, ...) { - .Deprecated("dropTempView") - dispatchFunc("dropTempView(viewName)", x, ...) -} - #' Drops the temporary view with the given view name in the catalog. #' #' Drops the temporary view with the given view name in the catalog. @@ -741,9 +586,8 @@ dropTempView <- function(viewName) { #' df3 <- loadDF("data/test_table", "parquet", mergeSchema = "true") #' } #' @name read.df -#' @method read.df default #' @note read.df since 1.4.0 -read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) { +read.df <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) { sparkSession <- getSparkSession() options <- varargsToEnv(...) if (!is.null(path)) { @@ -766,22 +610,13 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string dataFrame(sdf) } -read.df <- function(x, ...) { - dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", x, ...) -} - #' @rdname read.df #' @name loadDF -#' @method loadDF default #' @note loadDF since 1.6.0 -loadDF.default <- function(path = NULL, source = NULL, schema = NULL, ...) { +loadDF <- function(path = NULL, source = NULL, schema = NULL, ...) { read.df(path, source, schema, ...) } -loadDF <- function(x, ...) { - dispatchFunc("loadDF(path = NULL, source = NULL, schema = NULL, ...)", x, ...) -} - #' Create an external table #' #' Creates an external table based on the dataset in a data source, @@ -803,9 +638,8 @@ loadDF <- function(x, ...) { #' df <- createExternalTable("myjson", path="path/to/json", source="json") #' } #' @name createExternalTable -#' @method createExternalTable default #' @note createExternalTable since 1.4.0 -createExternalTable.default <- function(tableName, path = NULL, source = NULL, ...) { +createExternalTable <- function(tableName, path = NULL, source = NULL, ...) { sparkSession <- getSparkSession() options <- varargsToEnv(...) if (!is.null(path)) { @@ -816,10 +650,6 @@ createExternalTable.default <- function(tableName, path = NULL, source = NULL, . dataFrame(sdf) } -createExternalTable <- function(x, ...) { - dispatchFunc("createExternalTable(tableName, path = NULL, source = NULL, ...)", x, ...) -} - #' Create a SparkDataFrame representing the database table accessible via JDBC URL #' #' Additional JDBC database connection properties can be set (...) diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index ff5297ffd51cb..de9d7661ccabe 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -90,50 +90,6 @@ sparkR.session.stop <- function() { clearJobjs() } -#' (Deprecated) Initialize a new Spark Context -#' -#' This function initializes a new SparkContext. -#' -#' @param master The Spark master URL -#' @param appName Application name to register with cluster manager -#' @param sparkHome Spark Home directory -#' @param sparkEnvir Named list of environment variables to set on worker nodes -#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors -#' @param sparkJars Character vector of jar files to pass to the worker nodes -#' @param sparkPackages Character vector of packages from spark-packages.org -#' @seealso \link{sparkR.session} -#' @rdname sparkR.init-deprecated -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark") -#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark", -#' list(spark.executor.memory="1g")) -#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark", -#' list(spark.executor.memory="4g"), -#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"), -#' c("one.jar", "two.jar", "three.jar"), -#' c("com.databricks:spark-avro_2.10:2.0.1")) -#'} -#' @note sparkR.init since 1.4.0 -sparkR.init <- function( - master = "", - appName = "SparkR", - sparkHome = Sys.getenv("SPARK_HOME"), - sparkEnvir = list(), - sparkExecutorEnv = list(), - sparkJars = "", - sparkPackages = "") { - .Deprecated("sparkR.session") - sparkR.sparkContext(master, - appName, - sparkHome, - convertNamedListToEnv(sparkEnvir), - convertNamedListToEnv(sparkExecutorEnv), - sparkJars, - sparkPackages) -} - # Internal function to handle creating the SparkContext. sparkR.sparkContext <- function( master = "", @@ -255,63 +211,6 @@ sparkR.sparkContext <- function( sc } -#' (Deprecated) Initialize a new SQLContext -#' -#' This function creates a SparkContext from an existing JavaSparkContext and -#' then uses it to initialize a new SQLContext -#' -#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. -#' This API is deprecated and kept for backward compatibility only. -#' -#' @param jsc The existing JavaSparkContext created with SparkR.init() -#' @seealso \link{sparkR.session} -#' @rdname sparkRSQL.init-deprecated -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) -#'} -#' @note sparkRSQL.init since 1.4.0 -sparkRSQL.init <- function(jsc = NULL) { - .Deprecated("sparkR.session") - - if (exists(".sparkRsession", envir = .sparkREnv)) { - return(get(".sparkRsession", envir = .sparkREnv)) - } - - # Default to without Hive support for backward compatibility. - sparkR.session(enableHiveSupport = FALSE) -} - -#' (Deprecated) Initialize a new HiveContext -#' -#' This function creates a HiveContext from an existing JavaSparkContext -#' -#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. -#' This API is deprecated and kept for backward compatibility only. -#' -#' @param jsc The existing JavaSparkContext created with SparkR.init() -#' @seealso \link{sparkR.session} -#' @rdname sparkRHive.init-deprecated -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRHive.init(sc) -#'} -#' @note sparkRHive.init since 1.4.0 -sparkRHive.init <- function(jsc = NULL) { - .Deprecated("sparkR.session") - - if (exists(".sparkRsession", envir = .sparkREnv)) { - return(get(".sparkRsession", envir = .sparkREnv)) - } - - # Default to without Hive support for backward compatibility. - sparkR.session(enableHiveSupport = TRUE) -} - #' Get the existing SparkSession or initialize a new SparkSession. #' #' Additional Spark properties can be set (...), and these named parameters take priority over @@ -407,26 +306,11 @@ sparkR.session <- function( #' setJobGroup("myJobGroup", "My job group description", TRUE) #'} #' @note setJobGroup since 1.5.0 -#' @method setJobGroup default -setJobGroup.default <- function(groupId, description, interruptOnCancel) { +setJobGroup <- function(groupId, description, interruptOnCancel) { sc <- getSparkContext() callJMethod(sc, "setJobGroup", groupId, description, interruptOnCancel) } -setJobGroup <- function(sc, groupId, description, interruptOnCancel) { - if (class(sc) == "jobj" && any(grepl("JavaSparkContext", getClassName.jobj(sc)))) { - .Deprecated("setJobGroup(groupId, description, interruptOnCancel)", - old = "setJobGroup(sc, groupId, description, interruptOnCancel)") - setJobGroup.default(groupId, description, interruptOnCancel) - } else { - # Parameter order is shifted - groupIdToUse <- sc - descriptionToUse <- groupId - interruptOnCancelToUse <- description - setJobGroup.default(groupIdToUse, descriptionToUse, interruptOnCancelToUse) - } -} - #' Clear current job group ID and its description #' #' @rdname clearJobGroup @@ -437,22 +321,11 @@ setJobGroup <- function(sc, groupId, description, interruptOnCancel) { #' clearJobGroup() #'} #' @note clearJobGroup since 1.5.0 -#' @method clearJobGroup default -clearJobGroup.default <- function() { +clearJobGroup <- function() { sc <- getSparkContext() callJMethod(sc, "clearJobGroup") } -clearJobGroup <- function(sc) { - if (!missing(sc) && - class(sc) == "jobj" && - any(grepl("JavaSparkContext", getClassName.jobj(sc)))) { - .Deprecated("clearJobGroup()", old = "clearJobGroup(sc)") - } - clearJobGroup.default() -} - - #' Cancel active jobs for the specified group #' #' @param groupId the ID of job group to be cancelled @@ -464,23 +337,11 @@ clearJobGroup <- function(sc) { #' cancelJobGroup("myJobGroup") #'} #' @note cancelJobGroup since 1.5.0 -#' @method cancelJobGroup default -cancelJobGroup.default <- function(groupId) { +cancelJobGroup <- function(groupId) { sc <- getSparkContext() callJMethod(sc, "cancelJobGroup", groupId) } -cancelJobGroup <- function(sc, groupId) { - if (class(sc) == "jobj" && any(grepl("JavaSparkContext", getClassName.jobj(sc)))) { - .Deprecated("cancelJobGroup(groupId)", old = "cancelJobGroup(sc, groupId)") - cancelJobGroup.default(groupId) - } else { - # Parameter order is shifted - groupIdToUse <- sc - cancelJobGroup.default(groupIdToUse) - } -} - sparkConfToSubmitOps <- new.env() sparkConfToSubmitOps[["spark.driver.memory"]] <- "--driver-memory" sparkConfToSubmitOps[["spark.driver.extraClassPath"]] <- "--driver-class-path" From 503c93560651d8479caffb48b56415405ba3869c Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 23 Jul 2016 14:34:00 -0700 Subject: [PATCH 2/4] fix namespace file --- R/pkg/NAMESPACE | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 1d74c6d95578f..dd1e32f1e7bc3 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -7,15 +7,11 @@ importFrom(methods, setGeneric, setMethod, setOldClass) # S3 methods exported export("sparkR.session") -export("sparkR.init") -export("sparkR.stop") export("sparkR.session.stop") +export("sparkR.stop") export("sparkR.conf") export("print.jobj") -export("sparkRSQL.init", - "sparkRHive.init") - # MLlib integration exportMethods("glm", "spark.glm", @@ -307,9 +303,7 @@ export("as.DataFrame", "createExternalTable", "dropTempTable", "dropTempView", - "jsonFile", "loadDF", - "parquetFile", "read.df", "read.jdbc", "read.json", From 31622151eae38644ccd22051872797e5632bf5c8 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 23 Jul 2016 15:14:27 -0700 Subject: [PATCH 3/4] fix test --- R/pkg/inst/tests/testthat/test_context.R | 13 ---- R/pkg/inst/tests/testthat/test_sparkSQL.R | 72 ++--------------------- 2 files changed, 4 insertions(+), 81 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 8bd134a58d68e..b7184f5dbfbce 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -54,15 +54,6 @@ test_that("Check masked functions", { sort(namesOfMaskedCompletely, na.last = TRUE)) }) -test_that("repeatedly starting and stopping SparkR", { - for (i in 1:4) { - sc <- suppressWarnings(sparkR.init()) - rdd <- parallelize(sc, 1:20, 2L) - expect_equal(count(rdd), 20) - suppressWarnings(sparkR.stop()) - } -}) - test_that("repeatedly starting and stopping SparkSession", { for (i in 1:4) { sparkR.session(enableHiveSupport = FALSE) @@ -100,10 +91,6 @@ test_that("job group functions can be called", { setJobGroup("groupId", "job description", TRUE) cancelJobGroup("groupId") clearJobGroup() - - suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE)) - suppressWarnings(cancelJobGroup(sc, "groupId")) - suppressWarnings(clearJobGroup(sc)) sparkR.session.stop() }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3f3cb766b38f1..984e44407629a 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -88,15 +88,6 @@ mockLinesComplexType <- complexTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesComplexType, complexTypeJsonPath) -test_that("calling sparkRSQL.init returns existing SQL context", { - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) - expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext) -}) - -test_that("calling sparkRSQL.init returns existing SparkSession", { - expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession) -}) - test_that("calling sparkR.session returns existing SparkSession", { expect_equal(sparkR.session(), sparkSession) }) @@ -474,33 +465,15 @@ test_that("read/write json files", { jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json") write.json(df, jsonPath3) - # Test read.json()/jsonFile() works with multiple input paths + # Test read.json() works with multiple input paths jsonDF1 <- read.json(c(jsonPath2, jsonPath3)) expect_is(jsonDF1, "SparkDataFrame") expect_equal(count(jsonDF1), 6) - # Suppress warnings because jsonFile is deprecated - jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3))) - expect_is(jsonDF2, "SparkDataFrame") - expect_equal(count(jsonDF2), 6) unlink(jsonPath2) unlink(jsonPath3) }) -test_that("jsonRDD() on a RDD with json string", { - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) - rdd <- parallelize(sc, mockLines) - expect_equal(count(rdd), 3) - df <- suppressWarnings(jsonRDD(sqlContext, rdd)) - expect_is(df, "SparkDataFrame") - expect_equal(count(df), 3) - - rdd2 <- flatMap(rdd, function(x) c(x, x)) - df <- suppressWarnings(jsonRDD(sqlContext, rdd2)) - expect_is(df, "SparkDataFrame") - expect_equal(count(df), 6) -}) - test_that("test tableNames and tables", { df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") @@ -508,10 +481,10 @@ test_that("test tableNames and tables", { tables <- tables() expect_equal(count(tables), 1) - suppressWarnings(registerTempTable(df, "table2")) + createOrReplaceTempView(df, "table2") tables <- tables() expect_equal(count(tables), 2) - suppressWarnings(dropTempTable("table1")) + dropTempView("table1") dropTempView("table2") tables <- tables() @@ -1650,7 +1623,6 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", { expect_is(unioned, "SparkDataFrame") expect_equal(count(unioned), 6) expect_equal(first(unioned)$name, "Michael") - expect_equal(count(arrange(suppressWarnings(unionAll(df, df2)), df$age)), 6) unioned2 <- arrange(rbind(unioned, df, df2), df$age) expect_is(unioned2, "SparkDataFrame") @@ -1777,13 +1749,10 @@ test_that("read/write Parquet files", { parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") write.parquet(df, parquetPath2) parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet") - suppressWarnings(saveAsParquetFile(df, parquetPath3)) + write.parquet(df, parquetPath3) parquetDF <- read.parquet(c(parquetPath2, parquetPath3)) expect_is(parquetDF, "SparkDataFrame") expect_equal(count(parquetDF), count(df) * 2) - parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3)) - expect_is(parquetDF2, "SparkDataFrame") - expect_equal(count(parquetDF2), count(df) * 2) # Test if varargs works with variables saveMode <- "overwrite" @@ -2400,39 +2369,6 @@ test_that("Window functions on a DataFrame", { expect_equal(result, expected) }) -test_that("createDataFrame sqlContext parameter backward compatibility", { - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) - a <- 1:3 - b <- c("a", "b", "c") - ldf <- data.frame(a, b) - # Call function with namespace :: operator - SPARK-16538 - df <- suppressWarnings(SparkR::createDataFrame(sqlContext, ldf)) - expect_equal(columns(df), c("a", "b")) - expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) - expect_equal(count(df), 3) - ldf2 <- collect(df) - expect_equal(ldf$a, ldf2$a) - - df2 <- suppressWarnings(createDataFrame(sqlContext, iris)) - expect_equal(count(df2), 150) - expect_equal(ncol(df2), 5) - - df3 <- suppressWarnings(read.df(sqlContext, jsonPath, "json")) - expect_is(df3, "SparkDataFrame") - expect_equal(count(df3), 3) - - before <- suppressWarnings(createDataFrame(sqlContext, iris)) - after <- suppressWarnings(createDataFrame(iris)) - expect_equal(collect(before), collect(after)) - - # more tests for SPARK-16538 - createOrReplaceTempView(df, "table") - SparkR::tables() - SparkR::sql("SELECT 1") - suppressWarnings(SparkR::sql(sqlContext, "SELECT * FROM table")) - suppressWarnings(SparkR::dropTempTable(sqlContext, "table")) -}) - test_that("randomSplit", { num <- 4000 df <- createDataFrame(data.frame(id = 1:num)) From 9cc22601569c85928505164d47a0d01371244f8d Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 23 Jul 2016 15:39:19 -0700 Subject: [PATCH 4/4] missed registerTempTable --- R/pkg/NAMESPACE | 1 - R/pkg/R/generics.R | 4 ---- 2 files changed, 5 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index dd1e32f1e7bc3..01c18cbafe3c4 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -86,7 +86,6 @@ exportMethods("arrange", "printSchema", "randomSplit", "rbind", - "registerTempTable", "rename", "repartition", "sample", diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index e7444ac2467d8..7329e44b4c2ca 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -563,10 +563,6 @@ setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) #' @export setGeneric("rename", function(x, ...) { standardGeneric("rename") }) -#' @rdname registerTempTable-deprecated -#' @export -setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) - #' @rdname sample #' @export setGeneric("sample",