@@ -271,7 +271,7 @@ setMethod("show", "SparkDataFrame",
271271 paste(l , collapse = " :" )
272272 })
273273 s <- paste(cols , collapse = " , " )
274- cat(paste (class(object ), " [" , s , " ]\n " , sep = " " ))
274+ cat(paste0 (class(object ), " [" , s , " ]\n " ))
275275 }
276276 })
277277
@@ -1659,9 +1659,7 @@ setMethod("dapplyCollect",
16591659# '
16601660# ' @param cols grouping columns.
16611661# ' @param func a function to be applied to each group partition specified by grouping
1662- # ' column of the SparkDataFrame. The function \code{func} takes as argument
1663- # ' a key - grouping columns and a data frame - a local R data.frame.
1664- # ' The output of \code{func} is a local R data.frame.
1662+ # ' column of the SparkDataFrame. See Details.
16651663# ' @param schema the schema of the resulting SparkDataFrame after the function is applied.
16661664# ' The schema must match to output of \code{func}. It has to be defined for each
16671665# ' output column with preferred output column name and corresponding data type.
@@ -1671,29 +1669,43 @@ setMethod("dapplyCollect",
16711669# ' @aliases gapply,SparkDataFrame-method
16721670# ' @rdname gapply
16731671# ' @name gapply
1672+ # ' @details
1673+ # ' \code{func} is a function of two arguments. The first, usually named \code{key}
1674+ # ' (though this is not enforced) corresponds to the grouping key, will be an
1675+ # ' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
1676+ # ' to the grouping columns' values for the current group.
1677+ # '
1678+ # ' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
1679+ # ' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
1680+ # '
1681+ # ' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
1682+ # ' in particular this means the names of the output \code{data.frame} are irrelevant
1683+ # '
16741684# ' @seealso \link{gapplyCollect}
16751685# ' @examples
16761686# '
16771687# ' \dontrun{
1678- # ' Computes the arithmetic mean of the second column by grouping
1679- # ' on the first and third columns. Output the grouping values and the average.
1688+ # ' # Computes the arithmetic mean of the second column by grouping
1689+ # ' # on the first and third columns. Output the grouping values and the average.
16801690# '
16811691# ' df <- createDataFrame (
16821692# ' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
16831693# ' c("a", "b", "c", "d"))
16841694# '
1685- # ' Here our output contains three columns, the key which is a combination of two
1686- # ' columns with data types integer and string and the mean which is a double.
1695+ # ' # Here our output contains three columns, the key which is a combination of two
1696+ # ' # columns with data types integer and string and the mean which is a double.
16871697# ' schema <- structType(structField("a", "integer"), structField("c", "string"),
16881698# ' structField("avg", "double"))
16891699# ' result <- gapply(
16901700# ' df,
16911701# ' c("a", "c"),
16921702# ' function(key, x) {
1703+ # ' # key will either be list(1L, '1') (for the group where a=1L,c='1') or
1704+ # ' # list(3L, '3') (for the group where a=3L,c='3')
16931705# ' y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
16941706# ' }, schema)
16951707# '
1696- # ' The schema also can be specified in a DDL-formatted string.
1708+ # ' # The schema also can be specified in a DDL-formatted string.
16971709# ' schema <- "a INT, c STRING, avg DOUBLE"
16981710# ' result <- gapply(
16991711# ' df,
@@ -1702,8 +1714,8 @@ setMethod("dapplyCollect",
17021714# ' y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
17031715# ' }, schema)
17041716# '
1705- # ' We can also group the data and afterwards call gapply on GroupedData.
1706- # ' For Example :
1717+ # ' # We can also group the data and afterwards call gapply on GroupedData.
1718+ # ' # For example :
17071719# ' gdf <- group_by(df, "a", "c")
17081720# ' result <- gapply(
17091721# ' gdf,
@@ -1712,15 +1724,15 @@ setMethod("dapplyCollect",
17121724# ' }, schema)
17131725# ' collect(result)
17141726# '
1715- # ' Result
1716- # ' ------
1717- # ' a c avg
1718- # ' 3 3 3.0
1719- # ' 1 1 1.5
1727+ # ' # Result
1728+ # ' # ------
1729+ # ' # a c avg
1730+ # ' # 3 3 3.0
1731+ # ' # 1 1 1.5
17201732# '
1721- # ' Fits linear models on iris dataset by grouping on the 'Species' column and
1722- # ' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
1723- # ' and 'Petal_Width' as training features.
1733+ # ' # Fits linear models on iris dataset by grouping on the 'Species' column and
1734+ # ' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
1735+ # ' # and 'Petal_Width' as training features.
17241736# '
17251737# ' df <- createDataFrame (iris)
17261738# ' schema <- structType(structField("(Intercept)", "double"),
@@ -1736,12 +1748,12 @@ setMethod("dapplyCollect",
17361748# ' }, schema)
17371749# ' collect(df1)
17381750# '
1739- # ' Result
1740- # ' ---------
1741- # ' Model (Intercept) Sepal_Width Petal_Length Petal_Width
1742- # ' 1 0.699883 0.3303370 0.9455356 -0.1697527
1743- # ' 2 1.895540 0.3868576 0.9083370 -0.6792238
1744- # ' 3 2.351890 0.6548350 0.2375602 0.2521257
1751+ # ' # Result
1752+ # ' # ---------
1753+ # ' # Model (Intercept) Sepal_Width Petal_Length Petal_Width
1754+ # ' # 1 0.699883 0.3303370 0.9455356 -0.1697527
1755+ # ' # 2 1.895540 0.3868576 0.9083370 -0.6792238
1756+ # ' # 3 2.351890 0.6548350 0.2375602 0.2521257
17451757# '
17461758# '}
17471759# ' @note gapply(SparkDataFrame) since 2.0.0
@@ -1759,20 +1771,30 @@ setMethod("gapply",
17591771# '
17601772# ' @param cols grouping columns.
17611773# ' @param func a function to be applied to each group partition specified by grouping
1762- # ' column of the SparkDataFrame. The function \code{func} takes as argument
1763- # ' a key - grouping columns and a data frame - a local R data.frame.
1764- # ' The output of \code{func} is a local R data.frame.
1774+ # ' column of the SparkDataFrame. See Details.
17651775# ' @return A data.frame.
17661776# ' @family SparkDataFrame functions
17671777# ' @aliases gapplyCollect,SparkDataFrame-method
17681778# ' @rdname gapplyCollect
17691779# ' @name gapplyCollect
1780+ # ' @details
1781+ # ' \code{func} is a function of two arguments. The first, usually named \code{key}
1782+ # ' (though this is not enforced) corresponds to the grouping key, will be an
1783+ # ' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
1784+ # ' to the grouping columns' values for the current group.
1785+ # '
1786+ # ' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
1787+ # ' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
1788+ # '
1789+ # ' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
1790+ # ' in particular this means the names of the output \code{data.frame} are irrelevant
1791+ # '
17701792# ' @seealso \link{gapply}
17711793# ' @examples
17721794# '
17731795# ' \dontrun{
1774- # ' Computes the arithmetic mean of the second column by grouping
1775- # ' on the first and third columns. Output the grouping values and the average.
1796+ # ' # Computes the arithmetic mean of the second column by grouping
1797+ # ' # on the first and third columns. Output the grouping values and the average.
17761798# '
17771799# ' df <- createDataFrame (
17781800# ' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
@@ -1787,8 +1809,8 @@ setMethod("gapply",
17871809# ' y
17881810# ' })
17891811# '
1790- # ' We can also group the data and afterwards call gapply on GroupedData.
1791- # ' For Example :
1812+ # ' # We can also group the data and afterwards call gapply on GroupedData.
1813+ # ' # For example :
17921814# ' gdf <- group_by(df, "a", "c")
17931815# ' result <- gapplyCollect(
17941816# ' gdf,
@@ -1798,15 +1820,15 @@ setMethod("gapply",
17981820# ' y
17991821# ' })
18001822# '
1801- # ' Result
1802- # ' ------
1803- # ' key_a key_c mean_b
1804- # ' 3 3 3.0
1805- # ' 1 1 1.5
1823+ # ' # Result
1824+ # ' # ------
1825+ # ' # key_a key_c mean_b
1826+ # ' # 3 3 3.0
1827+ # ' # 1 1 1.5
18061828# '
1807- # ' Fits linear models on iris dataset by grouping on the 'Species' column and
1808- # ' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
1809- # ' and 'Petal_Width' as training features.
1829+ # ' # Fits linear models on iris dataset by grouping on the 'Species' column and
1830+ # ' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
1831+ # ' # and 'Petal_Width' as training features.
18101832# '
18111833# ' df <- createDataFrame (iris)
18121834# ' result <- gapplyCollect(
@@ -1818,12 +1840,12 @@ setMethod("gapply",
18181840# ' data.frame(t(coef(m)))
18191841# ' })
18201842# '
1821- # ' Result
1822- # '---------
1823- # ' Model X.Intercept. Sepal_Width Petal_Length Petal_Width
1824- # ' 1 0.699883 0.3303370 0.9455356 -0.1697527
1825- # ' 2 1.895540 0.3868576 0.9083370 -0.6792238
1826- # ' 3 2.351890 0.6548350 0.2375602 0.2521257
1843+ # ' # Result
1844+ # ' # ---------
1845+ # ' # Model X.Intercept. Sepal_Width Petal_Length Petal_Width
1846+ # ' # 1 0.699883 0.3303370 0.9455356 -0.1697527
1847+ # ' # 2 1.895540 0.3868576 0.9083370 -0.6792238
1848+ # ' # 3 2.351890 0.6548350 0.2375602 0.2521257
18271849# '
18281850# '}
18291851# ' @note gapplyCollect(SparkDataFrame) since 2.0.0
@@ -2735,10 +2757,10 @@ setMethod("merge",
27352757 colY <- joinY [[i ]]
27362758
27372759 if (colX %in% by ) {
2738- colX <- paste (colX , suffixes [1 ], sep = " " )
2760+ colX <- paste0 (colX , suffixes [1 ])
27392761 }
27402762 if (colY %in% by ) {
2741- colY <- paste (colY , suffixes [2 ], sep = " " )
2763+ colY <- paste0 (colY , suffixes [2 ])
27422764 }
27432765
27442766 colX <- getColumn(xsel , colX )
@@ -2753,7 +2775,7 @@ setMethod("merge",
27532775
27542776 # sorts the result by 'by' columns if sort = TRUE
27552777 if (sort && length(by ) > 0 ) {
2756- colNameWithSuffix <- paste (by , suffixes [2 ], sep = " " )
2778+ colNameWithSuffix <- paste0 (by , suffixes [2 ])
27572779 joinRes <- do.call(" arrange" , c(joinRes , colNameWithSuffix , decreasing = FALSE ))
27582780 }
27592781
@@ -2776,7 +2798,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
27762798 cols <- lapply(allColNames , function (colName ) {
27772799 col <- getColumn(x , colName )
27782800 if (colName %in% intersectedColNames ) {
2779- newJoin <- paste (colName , suffix , sep = " " )
2801+ newJoin <- paste0 (colName , suffix )
27802802 if (newJoin %in% allColNames ) {
27812803 stop(" The following column name: " , newJoin , " occurs more than once in the 'DataFrame'." ,
27822804 " Please use different suffixes for the intersected columns." )
0 commit comments