Skip to content

Commit 5fd20b6

Browse files
vectorijkjkbradley
authored andcommitted
[SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-MLib changes
## What changes were proposed in this pull request? R Docs changes include typos, format, layout. ## How was this patch tested? Test locally. Author: Kai Jiang <[email protected]> Closes #13394 from vectorijk/spark-15490.
1 parent 63470af commit 5fd20b6

File tree

10 files changed

+123
-88
lines changed

10 files changed

+123
-88
lines changed

R/pkg/R/DataFrame.R

Lines changed: 51 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ NULL
2323
setOldClass("jobj")
2424
setOldClass("structType")
2525

26-
#' @title S4 class that represents a SparkDataFrame
27-
#' @description DataFrames can be created using functions like \link{createDataFrame},
28-
#' \link{read.json}, \link{table} etc.
26+
#' S4 class that represents a SparkDataFrame
27+
#'
28+
#' DataFrames can be created using functions like \link{createDataFrame},
29+
#' \link{read.json}, \link{table} etc.
30+
#'
2931
#' @family SparkDataFrame functions
3032
#' @rdname SparkDataFrame
3133
#' @docType class
@@ -629,8 +631,6 @@ setMethod("repartition",
629631
#'
630632
#' @param x A SparkDataFrame
631633
#' @return A StringRRDD of JSON objects
632-
#' @family SparkDataFrame functions
633-
#' @rdname tojson
634634
#' @noRd
635635
#' @examples
636636
#'\dontrun{
@@ -648,7 +648,7 @@ setMethod("toJSON",
648648
RDD(jrdd, serializedMode = "string")
649649
})
650650

651-
#' write.json
651+
#' Save the contents of SparkDataFrame as a JSON file
652652
#'
653653
#' Save the contents of a SparkDataFrame as a JSON file (one object per line). Files written out
654654
#' with this method can be read back in as a SparkDataFrame using read.json().
@@ -675,7 +675,7 @@ setMethod("write.json",
675675
invisible(callJMethod(write, "json", path))
676676
})
677677

678-
#' write.parquet
678+
#' Save the contents of SparkDataFrame as a Parquet file, preserving the schema.
679679
#'
680680
#' Save the contents of a SparkDataFrame as a Parquet file, preserving the schema. Files written out
681681
#' with this method can be read back in as a SparkDataFrame using read.parquet().
@@ -713,9 +713,9 @@ setMethod("saveAsParquetFile",
713713
write.parquet(x, path)
714714
})
715715

716-
#' write.text
716+
#' Save the content of SparkDataFrame in a text file at the specified path.
717717
#'
718-
#' Saves the content of the SparkDataFrame in a text file at the specified path.
718+
#' Save the content of the SparkDataFrame in a text file at the specified path.
719719
#' The SparkDataFrame must have only one column of string type with the name "value".
720720
#' Each row becomes a new line in the output file.
721721
#'
@@ -820,8 +820,6 @@ setMethod("sample_frac",
820820
sample(x, withReplacement, fraction, seed)
821821
})
822822

823-
#' nrow
824-
#'
825823
#' Returns the number of rows in a SparkDataFrame
826824
#'
827825
#' @param x A SparkDataFrame
@@ -874,6 +872,8 @@ setMethod("ncol",
874872
length(columns(x))
875873
})
876874

875+
#' Returns the dimensions of SparkDataFrame
876+
#'
877877
#' Returns the dimensions (number of rows and columns) of a SparkDataFrame
878878
#' @param x a SparkDataFrame
879879
#'
@@ -2012,8 +2012,9 @@ setMethod("join",
20122012
dataFrame(sdf)
20132013
})
20142014

2015+
#' Merges two data frames
2016+
#'
20152017
#' @name merge
2016-
#' @title Merges two data frames
20172018
#' @param x the first data frame to be joined
20182019
#' @param y the second data frame to be joined
20192020
#' @param by a character vector specifying the join columns. If by is not
@@ -2127,7 +2128,6 @@ setMethod("merge",
21272128
joinRes
21282129
})
21292130

2130-
#'
21312131
#' Creates a list of columns by replacing the intersected ones with aliases.
21322132
#' The name of the alias column is formed by concatanating the original column name and a suffix.
21332133
#'
@@ -2182,8 +2182,9 @@ setMethod("unionAll",
21822182
dataFrame(unioned)
21832183
})
21842184

2185-
#' @title Union two or more SparkDataFrames
2186-
#' @description Returns a new SparkDataFrame containing rows of all parameters.
2185+
#' Union two or more SparkDataFrames
2186+
#'
2187+
#' Returns a new SparkDataFrame containing rows of all parameters.
21872188
#'
21882189
#' @rdname rbind
21892190
#' @name rbind
@@ -2254,20 +2255,22 @@ setMethod("except",
22542255
dataFrame(excepted)
22552256
})
22562257

2257-
#' Save the contents of the SparkDataFrame to a data source
2258+
#' Save the contents of SparkDataFrame to a data source.
22582259
#'
22592260
#' The data source is specified by the `source` and a set of options (...).
22602261
#' If `source` is not specified, the default data source configured by
22612262
#' spark.sql.sources.default will be used.
22622263
#'
2263-
#' Additionally, mode is used to specify the behavior of the save operation when
2264-
#' data already exists in the data source. There are four modes: \cr
2265-
#' append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
2266-
#' overwrite: Existing data is expected to be overwritten by the contents of this
2267-
#' SparkDataFrame. \cr
2268-
#' error: An exception is expected to be thrown. \cr
2269-
#' ignore: The save operation is expected to not save the contents of the SparkDataFrame
2270-
#' and to not change the existing data. \cr
2264+
#' Additionally, mode is used to specify the behavior of the save operation when data already
2265+
#' exists in the data source. There are four modes:
2266+
#' \itemize{
2267+
#' \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
2268+
#' \item overwrite: Existing data is expected to be overwritten by the contents of this
2269+
#' SparkDataFrame.
2270+
#' \item error: An exception is expected to be thrown.
2271+
#' \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
2272+
#' and to not change the existing data.
2273+
#' }
22712274
#'
22722275
#' @param df A SparkDataFrame
22732276
#' @param path A name for the table
@@ -2315,8 +2318,6 @@ setMethod("saveDF",
23152318
write.df(df, path, source, mode, ...)
23162319
})
23172320

2318-
#' saveAsTable
2319-
#'
23202321
#' Save the contents of the SparkDataFrame to a data source as a table
23212322
#'
23222323
#' The data source is specified by the `source` and a set of options (...).
@@ -2543,11 +2544,12 @@ setMethod("fillna",
25432544
dataFrame(sdf)
25442545
})
25452546

2547+
#' Download data from a SparkDataFrame into a data.frame
2548+
#'
25462549
#' This function downloads the contents of a SparkDataFrame into an R's data.frame.
25472550
#' Since data.frames are held in memory, ensure that you have enough memory
25482551
#' in your system to accommodate the contents.
25492552
#'
2550-
#' @title Download data from a SparkDataFrame into a data.frame
25512553
#' @param x a SparkDataFrame
25522554
#' @return a data.frame
25532555
#' @family SparkDataFrame functions
@@ -2563,13 +2565,14 @@ setMethod("as.data.frame",
25632565
as.data.frame(collect(x), row.names, optional, ...)
25642566
})
25652567

2568+
#' Attach SparkDataFrame to R search path
2569+
#'
25662570
#' The specified SparkDataFrame is attached to the R search path. This means that
25672571
#' the SparkDataFrame is searched by R when evaluating a variable, so columns in
25682572
#' the SparkDataFrame can be accessed by simply giving their names.
25692573
#'
25702574
#' @family SparkDataFrame functions
25712575
#' @rdname attach
2572-
#' @title Attach SparkDataFrame to R search path
25732576
#' @param what (SparkDataFrame) The SparkDataFrame to attach
25742577
#' @param pos (integer) Specify position in search() where to attach.
25752578
#' @param name (character) Name to use for the attached SparkDataFrame. Names
@@ -2589,14 +2592,16 @@ setMethod("attach",
25892592
attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
25902593
})
25912594

2595+
#' Evaluate a R expression in an environment constructed from a SparkDataFrame
2596+
#'
25922597
#' Evaluate a R expression in an environment constructed from a SparkDataFrame
25932598
#' with() allows access to columns of a SparkDataFrame by simply referring to
25942599
#' their name. It appends every column of a SparkDataFrame into a new
25952600
#' environment. Then, the given expression is evaluated in this new
25962601
#' environment.
25972602
#'
25982603
#' @rdname with
2599-
#' @title Evaluate a R expression in an environment constructed from a SparkDataFrame
2604+
#' @family SparkDataFrame functions
26002605
#' @param data (SparkDataFrame) SparkDataFrame to use for constructing an environment.
26012606
#' @param expr (expression) Expression to evaluate.
26022607
#' @param ... arguments to be passed to future methods.
@@ -2612,10 +2617,12 @@ setMethod("with",
26122617
eval(substitute(expr), envir = newEnv, enclos = newEnv)
26132618
})
26142619

2620+
#' Compactly display the structure of a dataset
2621+
#'
26152622
#' Display the structure of a SparkDataFrame, including column names, column types, as well as a
26162623
#' a small sample of rows.
2624+
#'
26172625
#' @name str
2618-
#' @title Compactly display the structure of a dataset
26192626
#' @rdname str
26202627
#' @family SparkDataFrame functions
26212628
#' @param object a SparkDataFrame
@@ -2728,10 +2735,11 @@ setMethod("drop",
27282735
base::drop(x)
27292736
})
27302737

2738+
#' Compute histogram statistics for given column
2739+
#'
27312740
#' This function computes a histogram for a given SparkR Column.
27322741
#'
27332742
#' @name histogram
2734-
#' @title Histogram
27352743
#' @param nbins the number of bins (optional). Default value is 10.
27362744
#' @param df the SparkDataFrame containing the Column to build the histogram from.
27372745
#' @param colname the name of the column to build the histogram from.
@@ -2847,18 +2855,21 @@ setMethod("histogram",
28472855
return(histStats)
28482856
})
28492857

2850-
#' Saves the content of the SparkDataFrame to an external database table via JDBC
2858+
#' Save the content of SparkDataFrame to an external database table via JDBC.
28512859
#'
2852-
#' Additional JDBC database connection properties can be set (...)
2860+
#' Save the content of the SparkDataFrame to an external database table via JDBC. Additional JDBC
2861+
#' database connection properties can be set (...)
28532862
#'
28542863
#' Also, mode is used to specify the behavior of the save operation when
2855-
#' data already exists in the data source. There are four modes: \cr
2856-
#' append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
2857-
#' overwrite: Existing data is expected to be overwritten by the contents of this
2858-
#' SparkDataFrame. \cr
2859-
#' error: An exception is expected to be thrown. \cr
2860-
#' ignore: The save operation is expected to not save the contents of the SparkDataFrame
2861-
#' and to not change the existing data. \cr
2864+
#' data already exists in the data source. There are four modes:
2865+
#' \itemize{
2866+
#' \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
2867+
#' \item overwrite: Existing data is expected to be overwritten by the contents of this
2868+
#' SparkDataFrame.
2869+
#' \item error: An exception is expected to be thrown.
2870+
#' \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
2871+
#' and to not change the existing data.
2872+
#' }
28622873
#'
28632874
#' @param x A SparkDataFrame
28642875
#' @param url JDBC database url of the form `jdbc:subprotocol:subname`

R/pkg/R/RDD.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919

2020
setOldClass("jobj")
2121

22-
#' @title S4 class that represents an RDD
23-
#' @description RDD can be created using functions like
22+
#' S4 class that represents an RDD
23+
#'
24+
#' RDD can be created using functions like
2425
#' \code{parallelize}, \code{textFile} etc.
26+
#'
2527
#' @rdname RDD
2628
#' @seealso parallelize, textFile
2729
#' @slot env An R environment that stores bookkeeping states of the RDD
@@ -497,9 +499,9 @@ setMethod("map",
497499
lapply(X, FUN)
498500
})
499501

500-
#' Flatten results after apply a function to all elements
502+
#' Flatten results after applying a function to all elements
501503
#'
502-
#' This function return a new RDD by first applying a function to all
504+
#' This function returns a new RDD by first applying a function to all
503505
#' elements of this RDD, and then flattening the results.
504506
#'
505507
#' @param X The RDD to apply the transformation.
@@ -713,7 +715,7 @@ setMethod("sumRDD",
713715
reduce(x, "+")
714716
})
715717

716-
#' Applies a function to all elements in an RDD, and force evaluation.
718+
#' Applies a function to all elements in an RDD, and forces evaluation.
717719
#'
718720
#' @param x The RDD to apply the function
719721
#' @param func The function to be applied.
@@ -737,7 +739,7 @@ setMethod("foreach",
737739
invisible(collect(mapPartitions(x, partition.func)))
738740
})
739741

740-
#' Applies a function to each partition in an RDD, and force evaluation.
742+
#' Applies a function to each partition in an RDD, and forces evaluation.
741743
#'
742744
#' @examples
743745
#'\dontrun{

R/pkg/R/WindowSpec.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@
2020
#' @include generics.R jobj.R column.R
2121
NULL
2222

23-
#' @title S4 class that represents a WindowSpec
24-
#' @description WindowSpec can be created by using window.partitionBy()
25-
#' or window.orderBy()
23+
#' S4 class that represents a WindowSpec
24+
#'
25+
#' WindowSpec can be created by using window.partitionBy() or window.orderBy()
26+
#'
2627
#' @rdname WindowSpec
2728
#' @seealso \link{window.partitionBy}, \link{window.orderBy}
2829
#'

R/pkg/R/broadcast.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
.broadcastValues <- new.env()
2424
.broadcastIdToName <- new.env()
2525

26-
# @title S4 class that represents a Broadcast variable
27-
# @description Broadcast variables can be created using the broadcast
28-
# function from a \code{SparkContext}.
26+
# S4 class that represents a Broadcast variable
27+
#
28+
# Broadcast variables can be created using the broadcast
29+
# function from a \code{SparkContext}.
30+
#
2931
# @rdname broadcast-class
3032
# @seealso broadcast
3133
#

R/pkg/R/column.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ NULL
2222

2323
setOldClass("jobj")
2424

25-
#' @title S4 class that represents a SparkDataFrame column
26-
#' @description The column class supports unary, binary operations on SparkDataFrame columns
25+
#' S4 class that represents a SparkDataFrame column
26+
#'
27+
#' The column class supports unary, binary operations on SparkDataFrame columns
28+
#'
2729
#' @rdname column
2830
#'
2931
#' @slot jc reference to JVM SparkDataFrame column

0 commit comments

Comments
 (0)