Skip to content

Commit 5509165

Browse files
author
pgandhi
committed
[SPARK-25250]: Upmerging with master branch
2 parents a73f619 + 5bef4fe commit 5509165

File tree

1,392 files changed

+50514
-34450
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,392 files changed

+50514
-34450
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ target/
7777
unit-tests.log
7878
work/
7979
docs/.jekyll-metadata
80-
*.crc
8180

8281
# For Hive
8382
TempStatsStore/

R/WINDOWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
To build SparkR on Windows, the following steps are required
44

55
1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
6-
include Rtools and R in `PATH`.
6+
include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
77

88
2. Install
99
[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ URL: http://www.apache.org/ http://spark.apache.org/
1515
BugReports: http://spark.apache.org/contributing.html
1616
SystemRequirements: Java (== 8)
1717
Depends:
18-
R (>= 3.0),
18+
R (>= 3.1),
1919
methods
2020
Suggests:
2121
knitr,

R/pkg/NAMESPACE

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
2828

2929
# S3 methods exported
3030
export("sparkR.session")
31-
export("sparkR.init")
32-
export("sparkR.stop")
3331
export("sparkR.session.stop")
32+
export("sparkR.stop")
3433
export("sparkR.conf")
3534
export("sparkR.version")
3635
export("sparkR.uiWebUrl")
@@ -42,9 +41,6 @@ export("sparkR.callJStatic")
4241

4342
export("install.spark")
4443

45-
export("sparkRSQL.init",
46-
"sparkRHive.init")
47-
4844
# MLlib integration
4945
exportMethods("glm",
5046
"spark.glm",
@@ -71,7 +67,8 @@ exportMethods("glm",
7167
"spark.fpGrowth",
7268
"spark.freqItemsets",
7369
"spark.associationRules",
74-
"spark.findFrequentSequentialPatterns")
70+
"spark.findFrequentSequentialPatterns",
71+
"spark.assignClusters")
7572

7673
# Job group lifecycle management methods
7774
export("setJobGroup",
@@ -151,15 +148,13 @@ exportMethods("arrange",
151148
"printSchema",
152149
"randomSplit",
153150
"rbind",
154-
"registerTempTable",
155151
"rename",
156152
"repartition",
157153
"repartitionByRange",
158154
"rollup",
159155
"sample",
160156
"sample_frac",
161157
"sampleBy",
162-
"saveAsParquetFile",
163158
"saveAsTable",
164159
"saveDF",
165160
"schema",
@@ -201,6 +196,7 @@ exportMethods("%<=>%",
201196
"acos",
202197
"add_months",
203198
"alias",
199+
"approx_count_distinct",
204200
"approxCountDistinct",
205201
"approxQuantile",
206202
"array_contains",
@@ -259,6 +255,7 @@ exportMethods("%<=>%",
259255
"dayofweek",
260256
"dayofyear",
261257
"decode",
258+
"degrees",
262259
"dense_rank",
263260
"desc",
264261
"element_at",
@@ -341,6 +338,7 @@ exportMethods("%<=>%",
341338
"posexplode",
342339
"posexplode_outer",
343340
"quarter",
341+
"radians",
344342
"rand",
345343
"randn",
346344
"rank",
@@ -354,6 +352,8 @@ exportMethods("%<=>%",
354352
"row_number",
355353
"rpad",
356354
"rtrim",
355+
"schema_of_csv",
356+
"schema_of_json",
357357
"second",
358358
"sha1",
359359
"sha2",
@@ -387,6 +387,7 @@ exportMethods("%<=>%",
387387
"tanh",
388388
"toDegrees",
389389
"toRadians",
390+
"to_csv",
390391
"to_date",
391392
"to_json",
392393
"to_timestamp",
@@ -415,18 +416,14 @@ export("as.DataFrame",
415416
"cacheTable",
416417
"clearCache",
417418
"createDataFrame",
418-
"createExternalTable",
419419
"createTable",
420420
"currentDatabase",
421-
"dropTempTable",
422421
"dropTempView",
423-
"jsonFile",
424422
"listColumns",
425423
"listDatabases",
426424
"listFunctions",
427425
"listTables",
428426
"loadDF",
429-
"parquetFile",
430427
"read.df",
431428
"read.jdbc",
432429
"read.json",

R/pkg/R/DataFrame.R

Lines changed: 55 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ setMethod("showDF",
226226

227227
#' show
228228
#'
229-
#' Print class and type information of a Spark object.
229+
#' If eager evaluation is enabled and the Spark object is a SparkDataFrame, evaluate the
230+
#' SparkDataFrame and print top rows of the SparkDataFrame, otherwise, print the class
231+
#' and type information of the Spark object.
230232
#'
231233
#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
232234
#'
@@ -244,11 +246,33 @@ setMethod("showDF",
244246
#' @note show(SparkDataFrame) since 1.4.0
245247
setMethod("show", "SparkDataFrame",
246248
function(object) {
247-
cols <- lapply(dtypes(object), function(l) {
248-
paste(l, collapse = ":")
249-
})
250-
s <- paste(cols, collapse = ", ")
251-
cat(paste(class(object), "[", s, "]\n", sep = ""))
249+
allConf <- sparkR.conf()
250+
prop <- allConf[["spark.sql.repl.eagerEval.enabled"]]
251+
if (!is.null(prop) && identical(prop, "true")) {
252+
argsList <- list()
253+
argsList$x <- object
254+
prop <- allConf[["spark.sql.repl.eagerEval.maxNumRows"]]
255+
if (!is.null(prop)) {
256+
numRows <- as.integer(prop)
257+
if (numRows > 0) {
258+
argsList$numRows <- numRows
259+
}
260+
}
261+
prop <- allConf[["spark.sql.repl.eagerEval.truncate"]]
262+
if (!is.null(prop)) {
263+
truncate <- as.integer(prop)
264+
if (truncate > 0) {
265+
argsList$truncate <- truncate
266+
}
267+
}
268+
do.call(showDF, argsList)
269+
} else {
270+
cols <- lapply(dtypes(object), function(l) {
271+
paste(l, collapse = ":")
272+
})
273+
s <- paste(cols, collapse = ", ")
274+
cat(paste(class(object), "[", s, "]\n", sep = ""))
275+
}
252276
})
253277

254278
#' DataTypes
@@ -497,32 +521,6 @@ setMethod("createOrReplaceTempView",
497521
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
498522
})
499523

500-
#' (Deprecated) Register Temporary Table
501-
#'
502-
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
503-
#' @param x A SparkDataFrame
504-
#' @param tableName A character vector containing the name of the table
505-
#'
506-
#' @seealso \link{createOrReplaceTempView}
507-
#' @rdname registerTempTable-deprecated
508-
#' @name registerTempTable
509-
#' @aliases registerTempTable,SparkDataFrame,character-method
510-
#' @examples
511-
#'\dontrun{
512-
#' sparkR.session()
513-
#' path <- "path/to/file.json"
514-
#' df <- read.json(path)
515-
#' registerTempTable(df, "json_df")
516-
#' new_df <- sql("SELECT * FROM json_df")
517-
#'}
518-
#' @note registerTempTable since 1.4.0
519-
setMethod("registerTempTable",
520-
signature(x = "SparkDataFrame", tableName = "character"),
521-
function(x, tableName) {
522-
.Deprecated("createOrReplaceTempView")
523-
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
524-
})
525-
526524
#' insertInto
527525
#'
528526
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
@@ -768,6 +766,13 @@ setMethod("repartition",
768766
#' \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
769767
#' using \code{spark.sql.shuffle.partitions} as number of partitions.}
770768
#'}
769+
#' At least one partition-by expression must be specified.
770+
#' When no explicit sort order is specified, "ascending nulls first" is assumed.
771+
#'
772+
#' Note that due to performance reasons this method uses sampling to estimate the ranges.
773+
#' Hence, the output may not be consistent, since sampling can return different values.
774+
#' The sample size can be controlled by the config
775+
#' \code{spark.sql.execution.rangeExchange.sampleSizePerPartition}.
771776
#'
772777
#' @param x a SparkDataFrame.
773778
#' @param numPartitions the number of partitions to use.
@@ -822,7 +827,6 @@ setMethod("repartitionByRange",
822827
#' toJSON
823828
#'
824829
#' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
825-
#'
826830
#' Each row is turned into a JSON document with columns as different fields.
827831
#' The returned SparkDataFrame has a single character column with the name \code{value}
828832
#'
@@ -932,7 +936,6 @@ setMethod("write.orc",
932936
#' path <- "path/to/file.json"
933937
#' df <- read.json(path)
934938
#' write.parquet(df, "/tmp/sparkr-tmp1/")
935-
#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
936939
#'}
937940
#' @note write.parquet since 1.6.0
938941
setMethod("write.parquet",
@@ -943,17 +946,6 @@ setMethod("write.parquet",
943946
invisible(handledCallJMethod(write, "parquet", path))
944947
})
945948

946-
#' @rdname write.parquet
947-
#' @name saveAsParquetFile
948-
#' @aliases saveAsParquetFile,SparkDataFrame,character-method
949-
#' @note saveAsParquetFile since 1.4.0
950-
setMethod("saveAsParquetFile",
951-
signature(x = "SparkDataFrame", path = "character"),
952-
function(x, path) {
953-
.Deprecated("write.parquet")
954-
write.parquet(x, path)
955-
})
956-
957949
#' Save the content of SparkDataFrame in a text file at the specified path.
958950
#'
959951
#' Save the content of the SparkDataFrame in a text file at the specified path.
@@ -2738,15 +2730,29 @@ setMethod("union",
27382730
dataFrame(unioned)
27392731
})
27402732

2741-
#' unionAll is deprecated - use union instead
2742-
#' @rdname union
2743-
#' @name unionAll
2733+
#' Return a new SparkDataFrame containing the union of rows.
2734+
#'
2735+
#' This is an alias for \code{union}.
2736+
#'
2737+
#' @param x a SparkDataFrame.
2738+
#' @param y a SparkDataFrame.
2739+
#' @return A SparkDataFrame containing the result of the unionAll operation.
2740+
#' @family SparkDataFrame functions
27442741
#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
2742+
#' @rdname unionAll
2743+
#' @name unionAll
2744+
#' @seealso \link{union}
2745+
#' @examples
2746+
#'\dontrun{
2747+
#' sparkR.session()
2748+
#' df1 <- read.json(path)
2749+
#' df2 <- read.json(path2)
2750+
#' unionAllDF <- unionAll(df1, df2)
2751+
#' }
27452752
#' @note unionAll since 1.4.0
27462753
setMethod("unionAll",
27472754
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
27482755
function(x, y) {
2749-
.Deprecated("union")
27502756
union(x, y)
27512757
})
27522758

0 commit comments

Comments
 (0)