@@ -987,7 +987,7 @@ setMethod("$<-", signature(x = "DataFrame"),
987987
988988setClassUnion(" numericOrcharacter" , c(" numeric" , " character" ))
989989
990- # ' @rdname select
990+ # ' @rdname subset
991991# ' @name [[
992992setMethod ("[[ ", signature(x = "DataFrame", i = "numericOrcharacter"),
993993 function (x , i ) {
@@ -998,7 +998,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
998998 getColumn(x , i )
999999 })
10001000
1001- # ' @rdname select
1001+ # ' @rdname subset
10021002# ' @name [
10031003setMethod ("[ ", signature(x = "DataFrame", i = "missing"),
10041004 function (x , i , j , ... ) {
@@ -1012,20 +1012,51 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
10121012 select(x , j )
10131013 })
10141014
1015- # ' @rdname select
1015+ # ' @rdname subset
10161016# ' @name [
10171017setMethod ("[ ", signature(x = "DataFrame", i = "Column"),
10181018 function (x , i , j , ... ) {
10191019 # It could handle i as "character" but it seems confusing and not required
10201020 # https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
10211021 filtered <- filter(x , i )
10221022 if (! missing(j )) {
1023- filtered [, j ]
1023+ filtered [, j , ... ]
10241024 } else {
10251025 filtered
10261026 }
10271027 })
10281028
1029+ # ' Subset
1030+ # '
1031+ # ' Return subsets of DataFrame according to given conditions
1032+ # ' @param x A DataFrame
1033+ # ' @param subset A logical expression to filter on rows
1034+ # ' @param select expression for the single Column or a list of columns to select from the DataFrame
1035+ # ' @return A new DataFrame containing only the rows that meet the condition with selected columns
1036+ # ' @export
1037+ # ' @rdname subset
1038+ # ' @name subset
1039+ # ' @aliases [
1040+ # ' @family subsetting functions
1041+ # ' @examples
1042+ # ' \dontrun{
1043+ # ' # Columns can be selected using `[[` and `[`
1044+ # ' df[[2]] == df[["age"]]
1045+ # ' df[,2] == df[,"age"]
1046+ # ' df[,c("name", "age")]
1047+ # ' # Or to filter rows
1048+ # ' df[df$age > 20,]
1049+ # ' # DataFrame can be subset on both rows and Columns
1050+ # ' df[df$name == "Smith", c(1,2)]
1051+ # ' df[df$age %in% c(19, 30), 1:2]
1052+ # ' subset(df, df$age %in% c(19, 30), 1:2)
1053+ # ' subset(df, df$age %in% c(19), select = c(1,2))
1054+ # ' }
1055+ setMethod ("subset ", signature(x = "DataFrame"),
1056+ function (x , subset , select , ... ) {
1057+ x [subset , select , ... ]
1058+ })
1059+
10291060# ' Select
10301061# '
10311062# ' Selects a set of columns with names or Column expressions.
@@ -1034,22 +1065,17 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
10341065# ' @return A new DataFrame with selected columns
10351066# ' @export
10361067# ' @rdname select
1068+ # ' @name select
1069+ # ' @family subsetting functions
10371070# ' @examples
10381071# ' \dontrun{
10391072# ' select(df, "*")
10401073# ' select(df, "col1", "col2")
10411074# ' select(df, df$name, df$age + 1)
10421075# ' select(df, c("col1", "col2"))
10431076# ' select(df, list(df$name, df$age + 1))
1044- # ' # Columns can also be selected using `[[` and `[`
1045- # ' df[[2]] == df[["age"]]
1046- # ' df[,2] == df[,"age"]
1047- # ' df[,c("name", "age")]
10481077# ' # Similar to R data frames columns can also be selected using `$`
10491078# ' df$age
1050- # ' # It can also be subset on rows and Columns
1051- # ' df[df$name == "Smith", c(1,2)]
1052- # ' df[df$age %in% c(19, 30), 1:2]
10531079# ' }
10541080setMethod ("select ", signature(x = "DataFrame", col = "character"),
10551081 function (x , col , ... ) {
@@ -1121,7 +1147,7 @@ setMethod("selectExpr",
11211147# ' @return A DataFrame with the new column added.
11221148# ' @rdname withColumn
11231149# ' @name withColumn
1124- # ' @aliases mutate
1150+ # ' @aliases mutate transform
11251151# ' @export
11261152# ' @examples
11271153# '\dontrun{
@@ -1141,11 +1167,12 @@ setMethod("withColumn",
11411167# '
11421168# ' Return a new DataFrame with the specified columns added.
11431169# '
1144- # ' @param x A DataFrame
1170+ # ' @param .data A DataFrame
11451171# ' @param col a named argument of the form name = col
11461172# ' @return A new DataFrame with the new columns added.
11471173# ' @rdname withColumn
11481174# ' @name mutate
1175+ # ' @aliases withColumn transform
11491176# ' @export
11501177# ' @examples
11511178# '\dontrun{
@@ -1155,10 +1182,12 @@ setMethod("withColumn",
11551182# ' df <- jsonFile(sqlContext, path)
11561183# ' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
11571184# ' names(newDF) # Will contain newCol, newCol2
1185+ # ' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
11581186# ' }
11591187setMethod ("mutate ",
1160- signature(x = " DataFrame" ),
1161- function (x , ... ) {
1188+ signature(.data = " DataFrame" ),
1189+ function (.data , ... ) {
1190+ x <- .data
11621191 cols <- list (... )
11631192 stopifnot(length(cols ) > 0 )
11641193 stopifnot(class(cols [[1 ]]) == " Column" )
@@ -1173,6 +1202,16 @@ setMethod("mutate",
11731202 do.call(select , c(x , x $ " *" , cols ))
11741203 })
11751204
1205+ # ' @export
1206+ # ' @rdname withColumn
1207+ # ' @name transform
1208+ # ' @aliases withColumn mutate
1209+ setMethod ("transform ",
1210+ signature(`_data` = " DataFrame" ),
1211+ function (`_data` , ... ) {
1212+ mutate(`_data` , ... )
1213+ })
1214+
11761215# ' WithColumnRenamed
11771216# '
11781217# ' Rename an existing column in a DataFrame.
@@ -1300,6 +1339,7 @@ setMethod("orderBy",
13001339# ' @return A DataFrame containing only the rows that meet the condition.
13011340# ' @rdname filter
13021341# ' @name filter
1342+ # ' @family subsetting functions
13031343# ' @export
13041344# ' @examples
13051345# '\dontrun{
@@ -1699,9 +1739,9 @@ setMethod("dropna",
16991739# ' @name na.omit
17001740# ' @export
17011741setMethod ("na.omit ",
1702- signature(x = " DataFrame" ),
1703- function (x , how = c(" any" , " all" ), minNonNulls = NULL , cols = NULL ) {
1704- dropna(x , how , minNonNulls , cols )
1742+ signature(object = " DataFrame" ),
1743+ function (object , how = c(" any" , " all" ), minNonNulls = NULL , cols = NULL ) {
1744+ dropna(object , how , minNonNulls , cols )
17051745 })
17061746
17071747# ' fillna
0 commit comments