@@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column"))
12981298# ' Sort a DataFrame by the specified column(s).
12991299# '
13001300# ' @param x A DataFrame to be sorted.
1301- # ' @param col Either a Column object or character vector indicating the field to sort on
1301+ # ' @param col A character or Column object vector indicating the fields to sort on
13021302# ' @param ... Additional sorting fields
1303+ # ' @param decreasing A logical argument indicating sorting order for columns when
1304+ # ' a character vector is specified for col
13031305# ' @return A DataFrame where all elements are sorted.
13041306# ' @rdname arrange
13051307# ' @name arrange
@@ -1312,23 +1314,52 @@ setClassUnion("characterOrColumn", c("character", "Column"))
13121314# ' path <- "path/to/file.json"
13131315# ' df <- jsonFile(sqlContext, path)
13141316# ' arrange(df, df$col1)
1315- # ' arrange(df, "col1")
13161317# ' arrange(df, asc(df$col1), desc(abs(df$col2)))
1318+ # ' arrange(df, "col1", decreasing = TRUE)
1319+ # ' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
13171320# ' }
13181321setMethod ("arrange ",
1319- signature(x = " DataFrame" , col = " characterOrColumn " ),
1322+ signature(x = " DataFrame" , col = " Column " ),
13201323 function (x , col , ... ) {
1321- if (class(col ) == " character" ) {
1322- sdf <- callJMethod(x @ sdf , " sort" , col , list (... ))
1323- } else if (class(col ) == " Column" ) {
13241324 jcols <- lapply(list (col , ... ), function (c ) {
13251325 c @ jc
13261326 })
1327- sdf <- callJMethod( x @ sdf , " sort " , jcols )
1328- }
1327+
1328+ sdf <- callJMethod( x @ sdf , " sort " , jcols )
13291329 dataFrame(sdf )
13301330 })
13311331
1332+ # ' @rdname arrange
1333+ # ' @export
1334+ setMethod ("arrange ",
1335+ signature(x = " DataFrame" , col = " character" ),
1336+ function (x , col , ... , decreasing = FALSE ) {
1337+
1338+ # all sorting columns
1339+ by <- list (col , ... )
1340+
1341+ if (length(decreasing ) == 1 ) {
1342+ # in case only 1 boolean argument - decreasing value is specified,
1343+ # it will be used for all columns
1344+ decreasing <- rep(decreasing , length(by ))
1345+ } else if (length(decreasing ) != length(by )) {
1346+ stop(" Arguments 'col' and 'decreasing' must have the same length" )
1347+ }
1348+
1349+ # builds a list of columns of type Column
1350+ # example: [[1]] Column Species ASC
1351+ # [[2]] Column Petal_Length DESC
1352+ jcols <- lapply(seq_len(length(decreasing )), function (i ){
1353+ if (decreasing [[i ]]) {
1354+ desc(getColumn(x , by [[i ]]))
1355+ } else {
1356+ asc(getColumn(x , by [[i ]]))
1357+ }
1358+ })
1359+
1360+ do.call(" arrange" , c(x , jcols ))
1361+ })
1362+
13321363# ' @rdname arrange
13331364# ' @name orderby
13341365setMethod ("orderBy ",
@@ -1383,9 +1414,10 @@ setMethod("where",
13831414# ' @param x A Spark DataFrame
13841415# ' @param y A Spark DataFrame
13851416# ' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
1386- # ' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
1417+ # ' Column expression. If joinExpr is omitted, join() will perform a Cartesian join
13871418# ' @param joinType The type of join to perform. The following join types are available:
1388- # ' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
1419+ # ' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
1420+ # ' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
13891421# ' @return A DataFrame containing the result of the join operation.
13901422# ' @rdname join
13911423# ' @name join
@@ -1410,11 +1442,15 @@ setMethod("join",
14101442 if (is.null(joinType )) {
14111443 sdf <- callJMethod(x @ sdf , " join" , y @ sdf , joinExpr @ jc )
14121444 } else {
1413- if (joinType %in% c(" inner" , " outer" , " left_outer" , " right_outer" , " semijoin" )) {
1445+ if (joinType %in% c(" inner" , " outer" , " full" , " fullouter" ,
1446+ " leftouter" , " left_outer" , " left" ,
1447+ " rightouter" , " right_outer" , " right" , " leftsemi" )) {
1448+ joinType <- gsub(" _" , " " , joinType )
14141449 sdf <- callJMethod(x @ sdf , " join" , y @ sdf , joinExpr @ jc , joinType )
14151450 } else {
14161451 stop(" joinType must be one of the following types: " ,
1417- " 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'" )
1452+ " 'inner', 'outer', 'full', 'fullouter', 'leftouter', 'left_outer', 'left',
1453+ 'rightouter', 'right_outer', 'right', 'leftsemi'" )
14181454 }
14191455 }
14201456 }
@@ -1795,17 +1831,15 @@ setMethod("fillna",
17951831 if (length(colNames ) == 0 || ! all(colNames != " " )) {
17961832 stop(" value should be an a named list with each name being a column name." )
17971833 }
1798-
1799- # Convert to the named list to an environment to be passed to JVM
1800- valueMap <- new.env()
1801- for (col in colNames ) {
1802- # Check each item in the named list is of valid type
1803- v <- value [[col ]]
1834+ # Check each item in the named list is of valid type
1835+ lapply(value , function (v ) {
18041836 if (! (class(v ) %in% c(" integer" , " numeric" , " character" ))) {
18051837 stop(" Each item in value should be an integer, numeric or charactor." )
18061838 }
1807- valueMap [[col ]] <- v
1808- }
1839+ })
1840+
1841+ # Convert to the named list to an environment to be passed to JVM
1842+ valueMap <- convertNamedListToEnv(value )
18091843
18101844 # When value is a named list, caller is expected not to pass in cols
18111845 if (! is.null(cols )) {
@@ -1828,36 +1862,6 @@ setMethod("fillna",
18281862 dataFrame(sdf )
18291863 })
18301864
1831- # ' crosstab
1832- # '
1833- # ' Computes a pair-wise frequency table of the given columns. Also known as a contingency
1834- # ' table. The number of distinct values for each column should be less than 1e4. At most 1e6
1835- # ' non-zero pair frequencies will be returned.
1836- # '
1837- # ' @param col1 name of the first column. Distinct items will make the first item of each row.
1838- # ' @param col2 name of the second column. Distinct items will make the column names of the output.
1839- # ' @return a local R data.frame representing the contingency table. The first column of each row
1840- # ' will be the distinct values of `col1` and the column names will be the distinct values
1841- # ' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no
1842- # ' occurrences will have zero as their counts.
1843- # '
1844- # ' @rdname statfunctions
1845- # ' @name crosstab
1846- # ' @export
1847- # ' @examples
1848- # ' \dontrun{
1849- # ' df <- jsonFile(sqlCtx, "/path/to/file.json")
1850- # ' ct = crosstab(df, "title", "gender")
1851- # ' }
1852- setMethod ("crosstab ",
1853- signature(x = " DataFrame" , col1 = " character" , col2 = " character" ),
1854- function (x , col1 , col2 ) {
1855- statFunctions <- callJMethod(x @ sdf , " stat" )
1856- sct <- callJMethod(statFunctions , " crosstab" , col1 , col2 )
1857- collect(dataFrame(sct ))
1858- })
1859-
1860-
18611865# ' This function downloads the contents of a DataFrame into an R's data.frame.
18621866# ' Since data.frames are held in memory, ensure that you have enough memory
18631867# ' in your system to accommodate the contents.
@@ -1879,5 +1883,34 @@ setMethod("as.data.frame",
18791883 stop(paste(" Unused argument(s): " , paste(list (... ), collapse = " , " )))
18801884 }
18811885 collect(x )
1882- }
1883- )
1886+ })
1887+
1888+ # ' The specified DataFrame is attached to the R search path. This means that
1889+ # ' the DataFrame is searched by R when evaluating a variable, so columns in
1890+ # ' the DataFrame can be accessed by simply giving their names.
1891+ # '
1892+ # ' @rdname attach
1893+ # ' @title Attach DataFrame to R search path
1894+ # ' @param what (DataFrame) The DataFrame to attach
1895+ # ' @param pos (integer) Specify position in search() where to attach.
1896+ # ' @param name (character) Name to use for the attached DataFrame. Names
1897+ # ' starting with package: are reserved for library.
1898+ # ' @param warn.conflicts (logical) If TRUE, warnings are printed about conflicts
1899+ # ' from attaching the database, unless that DataFrame contains an object
1900+ # ' @examples
1901+ # ' \dontrun{
1902+ # ' attach(irisDf)
1903+ # ' summary(Sepal_Width)
1904+ # ' }
1905+ # ' @seealso \link{detach}
1906+ setMethod ("attach ",
1907+ signature(what = " DataFrame" ),
1908+ function (what , pos = 2 , name = deparse(substitute(what )), warn.conflicts = TRUE ) {
1909+ cols <- columns(what )
1910+ stopifnot(length(cols ) > 0 )
1911+ newEnv <- new.env()
1912+ for (i in 1 : length(cols )) {
1913+ assign(x = cols [i ], value = what [, cols [i ]], envir = newEnv )
1914+ }
1915+ attach(newEnv , pos = pos , name = name , warn.conflicts = warn.conflicts )
1916+ })
0 commit comments