Skip to content

Commit fa6f078

Browse files
committed
SparkR spark.addFile supports adding directory recursively.
1 parent 79159a1 commit fa6f078

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

R/pkg/R/context.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,17 +231,21 @@ setCheckpointDir <- function(sc, dirName) {
231231
#' filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
232232
#' use spark.getSparkFiles(fileName) to find its download location.
233233
#'
234+
#' A directory can be given if the recursive option is set to true.
235+
#' Currently directories are only supported for Hadoop-supported filesystems.
236+
#'
234237
#' @rdname spark.addFile
235238
#' @param path The path of the file to be added
239+
#' @param recursive Recursive or not if the path is directory. Default is FALSE.
236240
#' @export
237241
#' @examples
238242
#'\dontrun{
239243
#' spark.addFile("~/myfile")
240244
#'}
241245
#' @note spark.addFile since 2.1.0
242-
spark.addFile <- function(path) {
246+
spark.addFile <- function(path, recursive = FALSE) {
243247
sc <- getSparkContext()
244-
invisible(callJMethod(sc, "addFile", suppressWarnings(normalizePath(path))))
248+
invisible(callJMethod(sc, "addFile", suppressWarnings(normalizePath(path)), recursive))
245249
}
246250

247251
#' Get the root directory that contains files added through spark.addFile.

R/pkg/inst/tests/testthat/test_context.R

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ test_that("spark.lapply should perform simple transforms", {
169169

170170
test_that("add and get file to be downloaded with Spark job on every node", {
171171
sparkR.sparkContext()
172+
# Test add file.
172173
path <- tempfile(pattern = "hello", fileext = ".txt")
173174
filename <- basename(path)
174175
words <- "Hello World!"
@@ -177,5 +178,25 @@ test_that("add and get file to be downloaded with Spark job on every node", {
177178
download_path <- spark.getSparkFiles(filename)
178179
expect_equal(readLines(download_path), words)
179180
unlink(path)
181+
182+
# Test add directory recursively.
183+
path <- tempdir()
184+
dir_name <- basename(path)
185+
path1 <- paste0(path, "/hello.txt")
186+
file.create(path1)
187+
sub_path <- paste0(path, "/sub_hello")
188+
dir.create(sub_path)
189+
path2 <- paste0(sub_path, "/sub_hello.txt")
190+
file.create(path2)
191+
words <- "Hello World!"
192+
sub_words <- "Sub Hello World!"
193+
writeLines(words, path1)
194+
writeLines(sub_words, path2)
195+
spark.addFile(path, recursive = TRUE)
196+
download_path1 = spark.getSparkFiles(paste0(dir_name, "/hello.txt"))
197+
expect_equal(readLines(download_path1), words)
198+
download_path2 = spark.getSparkFiles(paste0(dir_name, "/sub_hello/sub_hello.txt"))
199+
expect_equal(readLines(download_path2), sub_words)
200+
unlink(path, recursive = TRUE)
180201
sparkR.session.stop()
181202
})

0 commit comments

Comments
 (0)