Skip to content

Commit cc4d522

Browse files
felixcheungrxin
authored andcommitted
[SPARK-12625][SPARKR][SQL] replace R usage of Spark SQL deprecated API
rxin davies shivaram Took save mode from my PR #10480, and move everything to writer methods. This is related to PR #10559 - [x] it seems jsonRDD() is broken, need to investigate - this is not a public API though; will look into some more tonight. (fixed) Author: felixcheung <[email protected]> Closes #10584 from felixcheung/rremovedeprecated.
1 parent b634901 commit cc4d522

File tree

6 files changed

+38
-31
lines changed

6 files changed

+38
-31
lines changed

R/pkg/R/DataFrame.R

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,10 @@ setMethod("registerTempTable",
458458
setMethod("insertInto",
459459
signature(x = "DataFrame", tableName = "character"),
460460
function(x, tableName, overwrite = FALSE) {
461-
callJMethod(x@sdf, "insertInto", tableName, overwrite)
461+
jmode <- convertToJSaveMode(ifelse(overwrite, "overwrite", "append"))
462+
write <- callJMethod(x@sdf, "write")
463+
write <- callJMethod(write, "mode", jmode)
464+
callJMethod(write, "insertInto", tableName)
462465
})
463466

464467
#' Cache
@@ -1948,18 +1951,15 @@ setMethod("write.df",
19481951
source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
19491952
"org.apache.spark.sql.parquet")
19501953
}
1951-
allModes <- c("append", "overwrite", "error", "ignore")
1952-
# nolint start
1953-
if (!(mode %in% allModes)) {
1954-
stop('mode should be one of "append", "overwrite", "error", "ignore"')
1955-
}
1956-
# nolint end
1957-
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
1954+
jmode <- convertToJSaveMode(mode)
19581955
options <- varargsToEnv(...)
19591956
if (!is.null(path)) {
19601957
options[["path"]] <- path
19611958
}
1962-
callJMethod(df@sdf, "save", source, jmode, options)
1959+
write <- callJMethod(df@sdf, "write")
1960+
write <- callJMethod(write, "format", source)
1961+
write <- callJMethod(write, "mode", jmode)
1962+
write <- callJMethod(write, "save", path)
19631963
})
19641964

19651965
#' @rdname write.df
@@ -2013,15 +2013,14 @@ setMethod("saveAsTable",
20132013
source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
20142014
"org.apache.spark.sql.parquet")
20152015
}
2016-
allModes <- c("append", "overwrite", "error", "ignore")
2017-
# nolint start
2018-
if (!(mode %in% allModes)) {
2019-
stop('mode should be one of "append", "overwrite", "error", "ignore"')
2020-
}
2021-
# nolint end
2022-
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
2016+
jmode <- convertToJSaveMode(mode)
20232017
options <- varargsToEnv(...)
2024-
callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
2018+
2019+
write <- callJMethod(df@sdf, "write")
2020+
write <- callJMethod(write, "format", source)
2021+
write <- callJMethod(write, "mode", jmode)
2022+
write <- callJMethod(write, "options", options)
2023+
callJMethod(write, "saveAsTable", tableName)
20252024
})
20262025

20272026
#' summary

R/pkg/R/SQLContext.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,12 @@ jsonFile <- function(sqlContext, path) {
256256

257257
# TODO: support schema
258258
jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
259+
.Deprecated("read.json")
259260
rdd <- serializeToString(rdd)
260261
if (is.null(schema)) {
261-
sdf <- callJMethod(sqlContext, "jsonRDD", callJMethod(getJRDD(rdd), "rdd"), samplingRatio)
262+
read <- callJMethod(sqlContext, "read")
263+
# samplingRatio is deprecated
264+
sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd"))
262265
dataFrame(sdf)
263266
} else {
264267
stop("not implemented")
@@ -289,10 +292,7 @@ read.parquet <- function(sqlContext, path) {
289292
# TODO: Implement saveasParquetFile and write examples for both
290293
parquetFile <- function(sqlContext, ...) {
291294
.Deprecated("read.parquet")
292-
# Allow the user to have a more flexible definiton of the text file path
293-
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
294-
sdf <- callJMethod(sqlContext, "parquetFile", paths)
295-
dataFrame(sdf)
295+
read.parquet(sqlContext, unlist(list(...)))
296296
}
297297

298298
#' SQL Query

R/pkg/R/column.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ setMethod("cast",
209209
setMethod("%in%",
210210
signature(x = "Column"),
211211
function(x, table) {
212-
jc <- callJMethod(x@jc, "in", as.list(table))
212+
jc <- callJMethod(x@jc, "isin", as.list(table))
213213
return(column(jc))
214214
})
215215

R/pkg/R/utils.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,3 +641,12 @@ assignNewEnv <- function(data) {
641641
splitString <- function(input) {
642642
Filter(nzchar, unlist(strsplit(input, ",|\\s")))
643643
}
644+
645+
convertToJSaveMode <- function(mode) {
646+
allModes <- c("append", "overwrite", "error", "ignore")
647+
if (!(mode %in% allModes)) {
648+
stop('mode should be one of "append", "overwrite", "error", "ignore"') # nolint
649+
}
650+
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
651+
jmode
652+
}

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,12 @@ test_that("read/write json files", {
423423
test_that("jsonRDD() on a RDD with json string", {
424424
rdd <- parallelize(sc, mockLines)
425425
expect_equal(count(rdd), 3)
426-
df <- jsonRDD(sqlContext, rdd)
426+
df <- suppressWarnings(jsonRDD(sqlContext, rdd))
427427
expect_is(df, "DataFrame")
428428
expect_equal(count(df), 3)
429429

430430
rdd2 <- flatMap(rdd, function(x) c(x, x))
431-
df <- jsonRDD(sqlContext, rdd2)
431+
df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
432432
expect_is(df, "DataFrame")
433433
expect_equal(count(df), 6)
434434
})

dev/run-tests.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -425,13 +425,12 @@ def run_build_tests():
425425

426426

427427
def run_sparkr_tests():
428-
# set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
428+
set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
429429

430-
# if which("R"):
431-
# run_cmd([os.path.join(SPARK_HOME, "R", "run-tests.sh")])
432-
# else:
433-
# print("Ignoring SparkR tests as R was not found in PATH")
434-
pass
430+
if which("R"):
431+
run_cmd([os.path.join(SPARK_HOME, "R", "run-tests.sh")])
432+
else:
433+
print("Ignoring SparkR tests as R was not found in PATH")
435434

436435

437436
def parse_opts():

0 commit comments

Comments
 (0)