Skip to content

Commit 09f4cea

Browse files
felixcheungshivaram
authored andcommitted
[SPARKR][DOCS] R code doc cleanup
## What changes were proposed in this pull request? I ran a full pass from A to Z and fixed the obvious duplications, improper grouping etc. There are still more doc issues to be cleaned up. ## How was this patch tested? manual tests Author: Felix Cheung <[email protected]> Closes #13798 from felixcheung/rdocseealso.
1 parent 41e0ffb commit 09f4cea

File tree

8 files changed

+70
-84
lines changed

8 files changed

+70
-84
lines changed

R/pkg/R/DataFrame.R

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView",
463463
})
464464

465465
#' (Deprecated) Register Temporary Table
466+
#'
466467
#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
467468
#' @param x A SparkDataFrame
468469
#' @param tableName A character vector containing the name of the table
@@ -606,10 +607,10 @@ setMethod("unpersist",
606607
#'
607608
#' The following options for repartition are possible:
608609
#' \itemize{
609-
#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by
610+
#' \item{1.} {Return a new SparkDataFrame partitioned by
610611
#' the given columns into `numPartitions`.}
611-
#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.}
612-
#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s),
612+
#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
613+
#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
613614
#' using `spark.sql.shuffle.partitions` as number of partitions.}
614615
#'}
615616
#' @param x A SparkDataFrame
@@ -1053,7 +1054,7 @@ setMethod("limit",
10531054
dataFrame(res)
10541055
})
10551056

1056-
#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame
1057+
#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame
10571058
#'
10581059
#' @family SparkDataFrame functions
10591060
#' @rdname take
@@ -1076,7 +1077,7 @@ setMethod("take",
10761077

10771078
#' Head
10781079
#'
1079-
#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL,
1080+
#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL,
10801081
#' then head() returns the first 6 rows in keeping with the current data.frame
10811082
#' convention in R.
10821083
#'
@@ -1157,7 +1158,6 @@ setMethod("toRDD",
11571158
#'
11581159
#' @param x a SparkDataFrame
11591160
#' @return a GroupedData
1160-
#' @seealso GroupedData
11611161
#' @family SparkDataFrame functions
11621162
#' @rdname groupBy
11631163
#' @name groupBy
@@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) {
12421242
#'
12431243
#' @param x A SparkDataFrame
12441244
#' @param func A function to be applied to each partition of the SparkDataFrame.
1245-
#' func should have only one parameter, to which a data.frame corresponds
1245+
#' func should have only one parameter, to which a R data.frame corresponds
12461246
#' to each partition will be passed.
1247-
#' The output of func should be a data.frame.
1247+
#' The output of func should be a R data.frame.
12481248
#' @param schema The schema of the resulting SparkDataFrame after the function is applied.
12491249
#' It must match the output of func.
12501250
#' @family SparkDataFrame functions
@@ -1291,9 +1291,9 @@ setMethod("dapply",
12911291
#'
12921292
#' @param x A SparkDataFrame
12931293
#' @param func A function to be applied to each partition of the SparkDataFrame.
1294-
#' func should have only one parameter, to which a data.frame corresponds
1294+
#' func should have only one parameter, to which a R data.frame corresponds
12951295
#' to each partition will be passed.
1296-
#' The output of func should be a data.frame.
1296+
#' The output of func should be a R data.frame.
12971297
#' @family SparkDataFrame functions
12981298
#' @rdname dapplyCollect
12991299
#' @name dapplyCollect
@@ -1641,7 +1641,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"),
16411641
}
16421642
})
16431643

1644-
#' @family SparkDataFrame functions
16451644
#' @rdname select
16461645
#' @export
16471646
#' @note select(SparkDataFrame, Column) since 1.4.0
@@ -1654,7 +1653,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
16541653
dataFrame(sdf)
16551654
})
16561655

1657-
#' @family SparkDataFrame functions
16581656
#' @rdname select
16591657
#' @export
16601658
#' @note select(SparkDataFrame, list) since 1.4.0
@@ -2001,7 +1999,6 @@ setMethod("filter",
20011999
dataFrame(sdf)
20022000
})
20032001

2004-
#' @family SparkDataFrame functions
20052002
#' @rdname filter
20062003
#' @name where
20072004
#' @note where since 1.4.0
@@ -2222,11 +2219,13 @@ setMethod("merge",
22222219
joinRes
22232220
})
22242221

2222+
#' Creates a list of columns by replacing the intersected ones with aliases
2223+
#'
22252224
#' Creates a list of columns by replacing the intersected ones with aliases.
22262225
#' The name of the alias column is formed by concatanating the original column name and a suffix.
22272226
#'
2228-
#' @param x a SparkDataFrame on which the
2229-
#' @param intersectedColNames a list of intersected column names
2227+
#' @param x a SparkDataFrame
2228+
#' @param intersectedColNames a list of intersected column names of the SparkDataFrame
22302229
#' @param suffix a suffix for the column name
22312230
#' @return list of columns
22322231
#'
@@ -2513,9 +2512,9 @@ setMethod("summary",
25132512
})
25142513

25152514

2516-
#' dropna
2515+
#' A set of SparkDataFrame functions working with NA values
25172516
#'
2518-
#' Returns a new SparkDataFrame omitting rows with null values.
2517+
#' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null values.
25192518
#'
25202519
#' @param x A SparkDataFrame.
25212520
#' @param how "any" or "all".
@@ -2567,9 +2566,7 @@ setMethod("na.omit",
25672566
dropna(object, how, minNonNulls, cols)
25682567
})
25692568

2570-
#' fillna
2571-
#'
2572-
#' Replace null values.
2569+
#' fillna - Replace null values.
25732570
#'
25742571
#' @param x A SparkDataFrame.
25752572
#' @param value Value to replace null values with.
@@ -2640,7 +2637,7 @@ setMethod("fillna",
26402637
dataFrame(sdf)
26412638
})
26422639

2643-
#' Download data from a SparkDataFrame into a data.frame
2640+
#' Download data from a SparkDataFrame into a R data.frame
26442641
#'
26452642
#' This function downloads the contents of a SparkDataFrame into an R's data.frame.
26462643
#' Since data.frames are held in memory, ensure that you have enough memory

R/pkg/R/SQLContext.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ dispatchFunc <- function(newFuncSig, x, ...) {
6767
}
6868

6969
#' return the SparkSession
70-
#' @note getSparkSession since 2.0.0
70+
#' @noRd
7171
getSparkSession <- function() {
7272
if (exists(".sparkRsession", envir = .sparkREnv)) {
7373
get(".sparkRsession", envir = .sparkREnv)
@@ -77,7 +77,7 @@ getSparkSession <- function() {
7777
}
7878

7979
#' infer the SQL type
80-
#' @note infer_type since 1.4.0
80+
#' @noRd
8181
infer_type <- function(x) {
8282
if (is.null(x)) {
8383
stop("can not infer type from NULL")
@@ -451,7 +451,7 @@ sql <- function(x, ...) {
451451
#' Create a SparkDataFrame from a SparkSQL Table
452452
#'
453453
#' Returns the specified Table as a SparkDataFrame. The Table must have already been registered
454-
#' in the SQLContext.
454+
#' in the SparkSession.
455455
#'
456456
#' @param tableName The SparkSQL Table to convert to a SparkDataFrame.
457457
#' @return SparkDataFrame

R/pkg/R/column.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ setOldClass("jobj")
3434
setClass("Column",
3535
slots = list(jc = "jobj"))
3636

37+
#' A set of operations working with SparkDataFrame columns
38+
#' @rdname columnfunctions
39+
#' @name columnfunctions
40+
NULL
41+
3742
setMethod("initialize", "Column", function(.Object, jc) {
3843
.Object@jc <- jc
3944
.Object
@@ -47,6 +52,7 @@ setMethod("column",
4752

4853
#' @rdname show
4954
#' @name show
55+
#' @export
5056
#' @note show(Column) since 1.4.0
5157
setMethod("show", "Column",
5258
function(object) {

R/pkg/R/context.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,10 @@ setCheckpointDir <- function(sc, dirName) {
225225
invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
226226
}
227227

228-
#' Run a function over a list of elements, distributing the computations with Spark.
228+
#' Run a function over a list of elements, distributing the computations with Spark
229229
#'
230-
#' Applies a function in a manner that is similar to doParallel or lapply to elements of a list.
230+
#' Run a function over a list of elements, distributing the computations with Spark. Applies a
231+
#' function in a manner that is similar to doParallel or lapply to elements of a list.
231232
#' The computations are distributed using Spark. It is conceptually the same as the following code:
232233
#' lapply(list, func)
233234
#'

R/pkg/R/functions.R

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,14 @@ setMethod("acos",
7777
column(jc)
7878
})
7979

80-
#' approxCountDistinct
80+
#' Returns the approximate number of distinct items in a group
8181
#'
82-
#' Aggregate function: returns the approximate number of distinct items in a group.
82+
#' Returns the approximate number of distinct items in a group. This is a column
83+
#' aggregate function.
8384
#'
8485
#' @rdname approxCountDistinct
8586
#' @name approxCountDistinct
86-
#' @family agg_funcs
87+
#' @return the approximate number of distinct items in a group.
8788
#' @export
8889
#' @examples \dontrun{approxCountDistinct(df$c)}
8990
#' @note approxCountDistinct(Column) since 1.4.0
@@ -234,7 +235,7 @@ setMethod("cbrt",
234235
column(jc)
235236
})
236237

237-
#' ceil
238+
#' Computes the ceiling of the given value
238239
#'
239240
#' Computes the ceiling of the given value.
240241
#'
@@ -254,15 +255,16 @@ setMethod("ceil",
254255
#' Though scala functions has "col" function, we don't expose it in SparkR
255256
#' because we don't want to conflict with the "col" function in the R base
256257
#' package and we also have "column" function exported which is an alias of "col".
258+
#' @noRd
257259
col <- function(x) {
258260
column(callJStatic("org.apache.spark.sql.functions", "col", x))
259261
}
260262

261-
#' column
263+
#' Returns a Column based on the given column name
262264
#'
263265
#' Returns a Column based on the given column name.
264266
#'
265-
#' @rdname col
267+
#' @rdname column
266268
#' @name column
267269
#' @family normal_funcs
268270
#' @export
@@ -385,9 +387,9 @@ setMethod("cosh",
385387
column(jc)
386388
})
387389

388-
#' count
390+
#' Returns the number of items in a group
389391
#'
390-
#' Aggregate function: returns the number of items in a group.
392+
#' Returns the number of items in a group. This is a column aggregate function.
391393
#'
392394
#' @rdname count
393395
#' @name count
@@ -1193,7 +1195,7 @@ setMethod("sha1",
11931195
#'
11941196
#' Computes the signum of the given value.
11951197
#'
1196-
#' @rdname signum
1198+
#' @rdname sign
11971199
#' @name signum
11981200
#' @family math_funcs
11991201
#' @export
@@ -1717,7 +1719,7 @@ setMethod("datediff", signature(y = "Column"),
17171719

17181720
#' hypot
17191721
#'
1720-
#' Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
1722+
#' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow.
17211723
#'
17221724
#' @rdname hypot
17231725
#' @name hypot
@@ -1813,12 +1815,8 @@ setMethod("pmod", signature(y = "Column"),
18131815
})
18141816

18151817

1816-
#' Approx Count Distinct
1817-
#'
1818-
#' @family agg_funcs
18191818
#' @rdname approxCountDistinct
18201819
#' @name approxCountDistinct
1821-
#' @return the approximate number of distinct items in a group.
18221820
#' @export
18231821
#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
18241822
#' @note approxCountDistinct(Column, numeric) since 1.4.0
@@ -1918,10 +1916,6 @@ setMethod("least",
19181916
column(jc)
19191917
})
19201918

1921-
#' ceiling
1922-
#'
1923-
#' Computes the ceiling of the given value.
1924-
#'
19251919
#' @rdname ceil
19261920
#' @name ceiling
19271921
#' @export
@@ -1933,11 +1927,7 @@ setMethod("ceiling",
19331927
ceil(x)
19341928
})
19351929

1936-
#' sign
1937-
#'
1938-
#' Computes the signum of the given value.
1939-
#'
1940-
#' @rdname signum
1930+
#' @rdname sign
19411931
#' @name sign
19421932
#' @export
19431933
#' @examples \dontrun{sign(df$c)}
@@ -1961,10 +1951,6 @@ setMethod("n_distinct", signature(x = "Column"),
19611951
countDistinct(x, ...)
19621952
})
19631953

1964-
#' n
1965-
#'
1966-
#' Aggregate function: returns the number of items in a group.
1967-
#'
19681954
#' @rdname count
19691955
#' @name n
19701956
#' @export

0 commit comments

Comments
 (0)