Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ export("as.DataFrame",
"loadDF",
"parquetFile",
"read.df",
"read.parquet",
"sql",
"table",
"tableNames",
Expand Down
16 changes: 14 additions & 2 deletions R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,18 +256,30 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
}
}


#' Create a DataFrame from a Parquet file.
#'
#' Loads a Parquet file, returning the result as a DataFrame.
#'
#' @param sqlContext SQLContext to use
#' @param ... Path(s) of parquet file(s) to read.
#' @param path Path of file to read. A vector of multiple paths is allowed.
#' @return DataFrame
#' @rdname read.parquet
#' @name read.parquet
#' @export
read.parquet <- function(sqlContext, path) {
# Allow the user to have a more flexible definiton of the text file path
paths <- as.list(suppressWarnings(normalizePath(path)))
read <- callJMethod(sqlContext, "read")
sdf <- callJMethod(read, "parquet", paths)
dataFrame(sdf)
}

#' @rdname read.parquet
#' @name parquetFile
#' @export
# TODO: Implement saveasParquetFile and write examples for both
parquetFile <- function(sqlContext, ...) {
.Deprecated("read.parquet")
# Allow the user to have a more flexible definiton of the text file path
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
sdf <- callJMethod(sqlContext, "parquetFile", paths)
Expand Down
11 changes: 7 additions & 4 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1420,22 +1420,25 @@ test_that("mutate(), transform(), rename() and names()", {
detach(airquality)
})

test_that("write.df() on DataFrame and works with parquetFile", {
test_that("write.df() on DataFrame and works with read.parquet", {
df <- jsonFile(sqlContext, jsonPath)
write.df(df, parquetPath, "parquet", mode="overwrite")
parquetDF <- parquetFile(sqlContext, parquetPath)
parquetDF <- read.parquet(sqlContext, parquetPath)
expect_is(parquetDF, "DataFrame")
expect_equal(count(df), count(parquetDF))
})

test_that("parquetFile works with multiple input paths", {
test_that("read.parquet()/parquetFile() works with multiple input paths", {
df <- jsonFile(sqlContext, jsonPath)
write.df(df, parquetPath, "parquet", mode="overwrite")
parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
write.df(df, parquetPath2, "parquet", mode="overwrite")
parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2)
parquetDF <- read.parquet(sqlContext, c(parquetPath, parquetPath2))
expect_is(parquetDF, "DataFrame")
expect_equal(count(parquetDF), count(df) * 2)
parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath, parquetPath2))
expect_is(parquetDF2, "DataFrame")
expect_equal(count(parquetDF2), count(df) * 2)

# Test if varargs works with variables
saveMode <- "overwrite"
Expand Down