@@ -169,8 +169,8 @@ setMethod("isLocal",
169169# '}
170170setMethod ("showDF ",
171171 signature(x = " DataFrame" ),
172- function (x , numRows = 20 ) {
173- s <- callJMethod(x @ sdf , " showString" , numToInt(numRows ))
172+ function (x , numRows = 20 , truncate = TRUE ) {
173+ s <- callJMethod(x @ sdf , " showString" , numToInt(numRows ), truncate )
174174 cat(s )
175175 })
176176
@@ -255,6 +255,16 @@ setMethod("names",
255255 columns(x )
256256 })
257257
258+ # ' @rdname columns
259+ setMethod(" names<-" ,
260+ signature(x = " DataFrame" ),
261+ function (x , value ) {
262+ if (! is.null(value )) {
263+ sdf <- callJMethod(x @ sdf , " toDF" , listToSeq(as.list(value )))
264+ dataFrame(sdf )
265+ }
266+ })
267+
258268# ' Register Temporary Table
259269# '
260270# ' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -473,6 +483,18 @@ setMethod("distinct",
473483 dataFrame(sdf )
474484 })
475485
486+ # ' @title Distinct rows in a DataFrame
487+ #
488+ # ' @description Returns a new DataFrame containing distinct rows in this DataFrame
489+ # '
490+ # ' @rdname unique
491+ # ' @aliases unique
492+ setMethod ("unique ",
493+ signature(x = " DataFrame" ),
494+ function (x ) {
495+ distinct(x )
496+ })
497+
476498# ' Sample
477499# '
478500# ' Return a sampled subset of this DataFrame using a random seed.
@@ -534,6 +556,58 @@ setMethod("count",
534556 callJMethod(x @ sdf , " count" )
535557 })
536558
559+ # ' @title Number of rows for a DataFrame
560+ # ' @description Returns number of rows in a DataFrames
561+ # '
562+ # ' @name nrow
563+ # '
564+ # ' @rdname nrow
565+ # ' @aliases count
566+ setMethod ("nrow ",
567+ signature(x = " DataFrame" ),
568+ function (x ) {
569+ count(x )
570+ })
571+
572+ # ' Returns the number of columns in a DataFrame
573+ # '
574+ # ' @param x a SparkSQL DataFrame
575+ # '
576+ # ' @rdname ncol
577+ # ' @export
578+ # ' @examples
579+ # '\dontrun{
580+ # ' sc <- sparkR.init()
581+ # ' sqlContext <- sparkRSQL.init(sc)
582+ # ' path <- "path/to/file.json"
583+ # ' df <- jsonFile(sqlContext, path)
584+ # ' ncol(df)
585+ # ' }
586+ setMethod ("ncol ",
587+ signature(x = " DataFrame" ),
588+ function (x ) {
589+ length(columns(x ))
590+ })
591+
592+ # ' Returns the dimentions (number of rows and columns) of a DataFrame
593+ # ' @param x a SparkSQL DataFrame
594+ # '
595+ # ' @rdname dim
596+ # ' @export
597+ # ' @examples
598+ # '\dontrun{
599+ # ' sc <- sparkR.init()
600+ # ' sqlContext <- sparkRSQL.init(sc)
601+ # ' path <- "path/to/file.json"
602+ # ' df <- jsonFile(sqlContext, path)
603+ # ' dim(df)
604+ # ' }
605+ setMethod ("dim ",
606+ signature(x = " DataFrame" ),
607+ function (x ) {
608+ c(count(x ), ncol(x ))
609+ })
610+
537611# ' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
538612# '
539613# ' @param x A SparkSQL DataFrame
@@ -1205,6 +1279,15 @@ setMethod("join",
12051279 dataFrame(sdf )
12061280 })
12071281
1282+ # ' rdname merge
1283+ # ' aliases join
1284+ setMethod ("merge ",
1285+ signature(x = " DataFrame" , y = " DataFrame" ),
1286+ function (x , y , joinExpr = NULL , joinType = NULL , ... ) {
1287+ join(x , y , joinExpr , joinType )
1288+ })
1289+
1290+
12081291# ' UnionAll
12091292# '
12101293# ' Return a new DataFrame containing the union of rows in this DataFrame
@@ -1231,6 +1314,22 @@ setMethod("unionAll",
12311314 dataFrame(unioned )
12321315 })
12331316
1317+ # ' @title Union two or more DataFrames
1318+ #
1319+ # ' @description Returns a new DataFrame containing rows of all parameters.
1320+ #
1321+ # ' @rdname rbind
1322+ # ' @aliases unionAll
1323+ setMethod ("rbind ",
1324+ signature(... = " DataFrame" ),
1325+ function (x , ... , deparse.level = 1 ) {
1326+ if (nargs() == 3 ) {
1327+ unionAll(x , ... )
1328+ } else {
1329+ unionAll(x , Recall(... , deparse.level = 1 ))
1330+ }
1331+ })
1332+
12341333# ' Intersect
12351334# '
12361335# ' Return a new DataFrame containing rows only in both this DataFrame
@@ -1314,21 +1413,23 @@ setMethod("except",
13141413# ' write.df(df, "myfile", "parquet", "overwrite")
13151414# ' }
13161415setMethod ("write.df ",
1317- signature(df = " DataFrame" , path = ' character' ),
1416+ signature(df = " DataFrame" , path = " character" ),
13181417 function (df , path , source = NULL , mode = " append" , ... ){
13191418 if (is.null(source )) {
13201419 sqlContext <- get(" .sparkRSQLsc" , envir = .sparkREnv )
13211420 source <- callJMethod(sqlContext , " getConf" , " spark.sql.sources.default" ,
13221421 " org.apache.spark.sql.parquet" )
13231422 }
13241423 allModes <- c(" append" , " overwrite" , " error" , " ignore" )
1424+ # nolint start
13251425 if (! (mode %in% allModes )) {
13261426 stop(' mode should be one of "append", "overwrite", "error", "ignore"' )
13271427 }
1428+ # nolint end
13281429 jmode <- callJStatic(" org.apache.spark.sql.api.r.SQLUtils" , " saveMode" , mode )
13291430 options <- varargsToEnv(... )
13301431 if (! is.null(path )) {
1331- options [[' path' ]] = path
1432+ options [[" path" ]] <- path
13321433 }
13331434 callJMethod(df @ sdf , " save" , source , jmode , options )
13341435 })
@@ -1337,7 +1438,7 @@ setMethod("write.df",
13371438# ' @aliases saveDF
13381439# ' @export
13391440setMethod ("saveDF ",
1340- signature(df = " DataFrame" , path = ' character' ),
1441+ signature(df = " DataFrame" , path = " character" ),
13411442 function (df , path , source = NULL , mode = " append" , ... ){
13421443 write.df(df , path , source , mode , ... )
13431444 })
@@ -1375,18 +1476,20 @@ setMethod("saveDF",
13751476# ' saveAsTable(df, "myfile")
13761477# ' }
13771478setMethod ("saveAsTable ",
1378- signature(df = " DataFrame" , tableName = ' character' , source = ' character' ,
1379- mode = ' character' ),
1479+ signature(df = " DataFrame" , tableName = " character" , source = " character" ,
1480+ mode = " character" ),
13801481 function (df , tableName , source = NULL , mode = " append" , ... ){
13811482 if (is.null(source )) {
13821483 sqlContext <- get(" .sparkRSQLsc" , envir = .sparkREnv )
13831484 source <- callJMethod(sqlContext , " getConf" , " spark.sql.sources.default" ,
13841485 " org.apache.spark.sql.parquet" )
13851486 }
13861487 allModes <- c(" append" , " overwrite" , " error" , " ignore" )
1488+ # nolint start
13871489 if (! (mode %in% allModes )) {
13881490 stop(' mode should be one of "append", "overwrite", "error", "ignore"' )
13891491 }
1492+ # nolint end
13901493 jmode <- callJStatic(" org.apache.spark.sql.api.r.SQLUtils" , " saveMode" , mode )
13911494 options <- varargsToEnv(... )
13921495 callJMethod(df @ sdf , " saveAsTable" , tableName , source , jmode , options )
@@ -1430,6 +1533,19 @@ setMethod("describe",
14301533 dataFrame(sdf )
14311534 })
14321535
1536+ # ' @title Summary
1537+ # '
1538+ # ' @description Computes statistics for numeric columns of the DataFrame
1539+ # '
1540+ # ' @rdname summary
1541+ # ' @aliases describe
1542+ setMethod ("summary ",
1543+ signature(x = " DataFrame" ),
1544+ function (x ) {
1545+ describe(x )
1546+ })
1547+
1548+
14331549# ' dropna
14341550# '
14351551# ' Returns a new DataFrame omitting rows with null values.
@@ -1554,3 +1670,31 @@ setMethod("fillna",
15541670 }
15551671 dataFrame(sdf )
15561672 })
1673+
1674+ # ' crosstab
1675+ # '
1676+ # ' Computes a pair-wise frequency table of the given columns. Also known as a contingency
1677+ # ' table. The number of distinct values for each column should be less than 1e4. At most 1e6
1678+ # ' non-zero pair frequencies will be returned.
1679+ # '
1680+ # ' @param col1 name of the first column. Distinct items will make the first item of each row.
1681+ # ' @param col2 name of the second column. Distinct items will make the column names of the output.
1682+ # ' @return a local R data.frame representing the contingency table. The first column of each row
1683+ # ' will be the distinct values of `col1` and the column names will be the distinct values
1684+ # ' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no
1685+ # ' occurrences will have zero as their counts.
1686+ # '
1687+ # ' @rdname statfunctions
1688+ # ' @export
1689+ # ' @examples
1690+ # ' \dontrun{
1691+ # ' df <- jsonFile(sqlCtx, "/path/to/file.json")
1692+ # ' ct = crosstab(df, "title", "gender")
1693+ # ' }
1694+ setMethod ("crosstab ",
1695+ signature(x = " DataFrame" , col1 = " character" , col2 = " character" ),
1696+ function (x , col1 , col2 ) {
1697+ statFunctions <- callJMethod(x @ sdf , " stat" )
1698+ sct <- callJMethod(statFunctions , " crosstab" , col1 , col2 )
1699+ collect(dataFrame(sct ))
1700+ })
0 commit comments