Skip to content

Commit 233dc12

Browse files
huaxingaoHyukjinKwon
authored andcommitted
[SPARK-31290][R] Add back the deprecated R APIs
### What changes were proposed in this pull request? Add back the deprecated R APIs removed by #22843 and #22815. These APIs are - `sparkR.init` - `sparkRSQL.init` - `sparkRHive.init` - `registerTempTable` - `createExternalTable` - `dropTempTable` No need to port the function such as ```r createExternalTable <- function(x, ...) { dispatchFunc("createExternalTable(tableName, path = NULL, source = NULL, ...)", x, ...) } ``` because this was for the backward compatibility when SQLContext exists before assuming from #9192, but seems we don't need it anymore since SparkR replaced SQLContext with Spark Session at #13635. ### Why are the changes needed? Amend Spark's Semantic Versioning Policy ### Does this PR introduce any user-facing change? Yes The removed R APIs are put back. ### How was this patch tested? Add back the removed tests Closes #28058 from huaxingao/r. Authored-by: Huaxin Gao <[email protected]> Signed-off-by: HyukjinKwon <[email protected]> (cherry picked from commit fd0b228) Signed-off-by: HyukjinKwon <[email protected]>
1 parent 01b26c4 commit 233dc12

File tree

7 files changed

+200
-4
lines changed

7 files changed

+200
-4
lines changed

R/pkg/NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
2828

2929
# S3 methods exported
3030
export("sparkR.session")
31+
export("sparkR.init")
3132
export("sparkR.session.stop")
3233
export("sparkR.stop")
3334
export("sparkR.conf")
@@ -41,6 +42,9 @@ export("sparkR.callJStatic")
4142

4243
export("install.spark")
4344

45+
export("sparkRSQL.init",
46+
"sparkRHive.init")
47+
4448
# MLlib integration
4549
exportMethods("glm",
4650
"spark.glm",
@@ -148,6 +152,7 @@ exportMethods("arrange",
148152
"printSchema",
149153
"randomSplit",
150154
"rbind",
155+
"registerTempTable",
151156
"rename",
152157
"repartition",
153158
"repartitionByRange",
@@ -420,8 +425,10 @@ export("as.DataFrame",
420425
"cacheTable",
421426
"clearCache",
422427
"createDataFrame",
428+
"createExternalTable",
423429
"createTable",
424430
"currentDatabase",
431+
"dropTempTable",
425432
"dropTempView",
426433
"listColumns",
427434
"listDatabases",

R/pkg/R/DataFrame.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
521521
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
522522
})
523523

524+
#' (Deprecated) Register Temporary Table
525+
#'
526+
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
527+
#' @param x A SparkDataFrame
528+
#' @param tableName A character vector containing the name of the table
529+
#'
530+
#' @seealso \link{createOrReplaceTempView}
531+
#' @rdname registerTempTable-deprecated
532+
#' @name registerTempTable
533+
#' @aliases registerTempTable,SparkDataFrame,character-method
534+
#' @examples
535+
#'\dontrun{
536+
#' sparkR.session()
537+
#' path <- "path/to/file.json"
538+
#' df <- read.json(path)
539+
#' registerTempTable(df, "json_df")
540+
#' new_df <- sql("SELECT * FROM json_df")
541+
#'}
542+
#' @note registerTempTable since 1.4.0
543+
setMethod("registerTempTable",
544+
signature(x = "SparkDataFrame", tableName = "character"),
545+
function(x, tableName) {
546+
.Deprecated("createOrReplaceTempView")
547+
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
548+
})
549+
524550
#' insertInto
525551
#'
526552
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.

R/pkg/R/catalog.R

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,35 @@
1717

1818
# catalog.R: SparkSession catalog functions
1919

20+
#' (Deprecated) Create an external table
21+
#'
22+
#' Creates an external table based on the dataset in a data source,
23+
#' Returns a SparkDataFrame associated with the external table.
24+
#'
25+
#' The data source is specified by the \code{source} and a set of options(...).
26+
#' If \code{source} is not specified, the default data source configured by
27+
#' "spark.sql.sources.default" will be used.
28+
#'
29+
#' @param tableName a name of the table.
30+
#' @param path the path of files to load.
31+
#' @param source the name of external data source.
32+
#' @param schema the schema of the data required for some data sources.
33+
#' @param ... additional argument(s) passed to the method.
34+
#' @return A SparkDataFrame.
35+
#' @rdname createExternalTable-deprecated
36+
#' @seealso \link{createTable}
37+
#' @examples
38+
#'\dontrun{
39+
#' sparkR.session()
40+
#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
41+
#' }
42+
#' @name createExternalTable
43+
#' @note createExternalTable since 1.4.0
44+
createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
45+
.Deprecated("createTable", old = "createExternalTable")
46+
createTable(tableName, path, source, schema, ...)
47+
}
48+
2049
#' Creates a table based on the dataset in a data source
2150
#'
2251
#' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
@@ -130,6 +159,31 @@ clearCache <- function() {
130159
invisible(callJMethod(catalog, "clearCache"))
131160
}
132161

162+
#' (Deprecated) Drop Temporary Table
163+
#'
164+
#' Drops the temporary table with the given table name in the catalog.
165+
#' If the table has been cached/persisted before, it's also unpersisted.
166+
#'
167+
#' @param tableName The name of the SparkSQL table to be dropped.
168+
#' @seealso \link{dropTempView}
169+
#' @rdname dropTempTable-deprecated
170+
#' @examples
171+
#' \dontrun{
172+
#' sparkR.session()
173+
#' df <- read.df(path, "parquet")
174+
#' createOrReplaceTempView(df, "table")
175+
#' dropTempTable("table")
176+
#' }
177+
#' @name dropTempTable
178+
#' @note dropTempTable since 1.4.0
179+
dropTempTable <- function(tableName) {
180+
.Deprecated("dropTempView", old = "dropTempTable")
181+
if (class(tableName) != "character") {
182+
stop("tableName must be a string.")
183+
}
184+
dropTempView(tableName)
185+
}
186+
133187
#' Drops the temporary view with the given view name in the catalog.
134188
#'
135189
#' Drops the temporary view with the given view name in the catalog.

R/pkg/R/generics.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
528528
#' @rdname printSchema
529529
setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
530530

531+
#' @rdname registerTempTable-deprecated
532+
setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
533+
531534
#' @rdname rename
532535
setGeneric("rename", function(x, ...) { standardGeneric("rename") })
533536

R/pkg/R/sparkR.R

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,49 @@ sparkR.stop <- function() {
8888
sparkR.session.stop()
8989
}
9090

91+
#' (Deprecated) Initialize a new Spark Context
92+
#'
93+
#' This function initializes a new SparkContext.
94+
#'
95+
#' @param master The Spark master URL
96+
#' @param appName Application name to register with cluster manager
97+
#' @param sparkHome Spark Home directory
98+
#' @param sparkEnvir Named list of environment variables to set on worker nodes
99+
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
100+
#' @param sparkJars Character vector of jar files to pass to the worker nodes
101+
#' @param sparkPackages Character vector of package coordinates
102+
#' @seealso \link{sparkR.session}
103+
#' @rdname sparkR.init-deprecated
104+
#' @examples
105+
#'\dontrun{
106+
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
107+
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
108+
#' list(spark.executor.memory="1g"))
109+
#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
110+
#' list(spark.executor.memory="4g"),
111+
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
112+
#' c("one.jar", "two.jar", "three.jar"),
113+
#' c("com.databricks:spark-avro_2.11:2.0.1"))
114+
#'}
115+
#' @note sparkR.init since 1.4.0
116+
sparkR.init <- function(
117+
master = "",
118+
appName = "SparkR",
119+
sparkHome = Sys.getenv("SPARK_HOME"),
120+
sparkEnvir = list(),
121+
sparkExecutorEnv = list(),
122+
sparkJars = "",
123+
sparkPackages = "") {
124+
.Deprecated("sparkR.session")
125+
sparkR.sparkContext(master,
126+
appName,
127+
sparkHome,
128+
convertNamedListToEnv(sparkEnvir),
129+
convertNamedListToEnv(sparkExecutorEnv),
130+
sparkJars,
131+
sparkPackages)
132+
}
133+
91134
# Internal function to handle creating the SparkContext.
92135
sparkR.sparkContext <- function(
93136
master = "",
@@ -229,6 +272,61 @@ sparkR.sparkContext <- function(
229272
sc
230273
}
231274

275+
#' (Deprecated) Initialize a new SQLContext
276+
#'
277+
#' This function creates a SparkContext from an existing JavaSparkContext and
278+
#' then uses it to initialize a new SQLContext
279+
#'
280+
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
281+
#' This API is deprecated and kept for backward compatibility only.
282+
#'
283+
#' @param jsc The existing JavaSparkContext created with SparkR.init()
284+
#' @seealso \link{sparkR.session}
285+
#' @rdname sparkRSQL.init-deprecated
286+
#' @examples
287+
#'\dontrun{
288+
#' sc <- sparkR.init()
289+
#' sqlContext <- sparkRSQL.init(sc)
290+
#'}
291+
#' @note sparkRSQL.init since 1.4.0
292+
sparkRSQL.init <- function(jsc = NULL) {
293+
.Deprecated("sparkR.session")
294+
295+
if (exists(".sparkRsession", envir = .sparkREnv)) {
296+
return(get(".sparkRsession", envir = .sparkREnv))
297+
}
298+
299+
# Default to without Hive support for backward compatibility.
300+
sparkR.session(enableHiveSupport = FALSE)
301+
}
302+
303+
#' (Deprecated) Initialize a new HiveContext
304+
#'
305+
#' This function creates a HiveContext from an existing JavaSparkContext
306+
#'
307+
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
308+
#' This API is deprecated and kept for backward compatibility only.
309+
#'
310+
#' @param jsc The existing JavaSparkContext created with SparkR.init()
311+
#' @seealso \link{sparkR.session}
312+
#' @rdname sparkRHive.init-deprecated
313+
#' @examples
314+
#'\dontrun{
315+
#' sc <- sparkR.init()
316+
#' sqlContext <- sparkRHive.init(sc)
317+
#'}
318+
#' @note sparkRHive.init since 1.4.0
319+
sparkRHive.init <- function(jsc = NULL) {
320+
.Deprecated("sparkR.session")
321+
322+
if (exists(".sparkRsession", envir = .sparkREnv)) {
323+
return(get(".sparkRsession", envir = .sparkREnv))
324+
}
325+
326+
# Default to without Hive support for backward compatibility.
327+
sparkR.session(enableHiveSupport = TRUE)
328+
}
329+
232330
#' Get the existing SparkSession or initialize a new SparkSession.
233331
#'
234332
#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ if (is_windows()) {
106106
Sys.setenv(TZ = "GMT")
107107
}
108108

109+
test_that("calling sparkRSQL.init returns existing SQL context", {
110+
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
111+
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
112+
})
113+
114+
test_that("calling sparkRSQL.init returns existing SparkSession", {
115+
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
116+
})
117+
109118
test_that("calling sparkR.session returns existing SparkSession", {
110119
expect_equal(sparkR.session(), sparkSession)
111120
})
@@ -656,10 +665,10 @@ test_that("test tableNames and tables", {
656665
expect_true("tableName" %in% colnames(tables()))
657666
expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
658667

659-
createOrReplaceTempView(df, "table2")
668+
suppressWarnings(registerTempTable(df, "table2"))
660669
tables <- listTables()
661670
expect_equal(count(tables), count + 2)
662-
dropTempView("table1")
671+
suppressWarnings(dropTempTable("table1"))
663672
expect_true(dropTempView("table2"))
664673

665674
tables <- listTables()

docs/sparkr-migration-guide.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.
2828

2929
## Upgrading from SparkR 2.4 to 3.0
3030

31-
- The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
32-
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
31+
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead.
3332

3433
## Upgrading from SparkR 2.3 to 2.4
3534

0 commit comments

Comments
 (0)