1919
2020setOldClass(" jobj" )
2121
22- # ' crosstab
23- # '
24- # ' Computes a pair-wise frequency table of the given columns. Also known as a contingency
22+ # ' @title SparkDataFrame statistic functions
23+
24+ # ' @description
25+ # ' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency
2526# ' table. The number of distinct values for each column should be less than 1e4. At most 1e6
2627# ' non-zero pair frequencies will be returned.
2728# '
@@ -49,16 +50,14 @@ setMethod("crosstab",
4950 collect(dataFrame(sct ))
5051 })
5152
52- # ' cov
53- # '
5453# ' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
5554# '
5655# ' @param x A SparkDataFrame
5756# ' @param col1 the name of the first column
5857# ' @param col2 the name of the second column
5958# ' @return the covariance of the two columns.
6059# '
61- # ' @rdname statfunctions
60+ # ' @rdname cov
6261# ' @name cov
6362# ' @export
6463# ' @examples
@@ -75,8 +74,6 @@ setMethod("cov",
7574 callJMethod(statFunctions , " cov" , col1 , col2 )
7675 })
7776
78- # ' corr
79- # '
8077# ' Calculates the correlation of two columns of a SparkDataFrame.
8178# ' Currently only supports the Pearson Correlation Coefficient.
8279# ' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
@@ -88,7 +85,7 @@ setMethod("cov",
8885# ' only "pearson" is allowed now.
8986# ' @return The Pearson Correlation Coefficient as a Double.
9087# '
91- # ' @rdname statfunctions
88+ # ' @rdname corr
9289# ' @name corr
9390# ' @export
9491# ' @examples
@@ -106,9 +103,8 @@ setMethod("corr",
106103 callJMethod(statFunctions , " corr" , col1 , col2 , method )
107104 })
108105
109- # ' freqItems
110- # '
111- # ' Finding frequent items for columns, possibly with false positives.
106+ # ' @description
107+ # ' freqItems - Finding frequent items for columns, possibly with false positives.
112108# ' Using the frequent element count algorithm described in
113109# ' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
114110# '
@@ -134,10 +130,8 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
134130 collect(dataFrame(sct ))
135131 })
136132
137- # ' approxQuantile
138- # '
139- # ' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
140- # '
133+ # ' @description
134+ # ' approxQuantile - Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
141135# ' The result of this algorithm has the following deterministic bound:
142136# ' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
143137# ' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
@@ -174,9 +168,9 @@ setMethod("approxQuantile",
174168 as.list(probabilities ), relativeError )
175169 })
176170
177- # ' sampleBy
178- # '
179- # ' Returns a stratified sample without replacement based on the fraction given on each stratum.
171+ # ' @description
172+ # ' sampleBy - Returns a stratified sample without replacement based on the fraction given on each
173+ # ' stratum.
180174# '
181175# ' @param x A SparkDataFrame
182176# ' @param col column that defines strata
0 commit comments