apache · felixcheung · May 13, 2016 · Jun 21, 2016 · Jun 21, 2016 · Jun 21, 2016
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -430,19 +430,19 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
-#' @rdname statfunctions
+#' @rdname corr
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
-#' @rdname statfunctions
+#' @rdname covar_pop
 #' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 

diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
@@ -19,9 +19,10 @@
 
 setOldClass("jobj")
 
-#' crosstab
-#'
-#' Computes a pair-wise frequency table of the given columns. Also known as a contingency
+#' @title SparkDataFrame statistic functions
+
+#' @description
+#' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency
 #' table. The number of distinct values for each column should be less than 1e4. At most 1e6
 #' non-zero pair frequencies will be returned.
 #'
@@ -49,16 +50,14 @@ setMethod("crosstab",
             collect(dataFrame(sct))
           })
 
-#' cov
-#'
 #' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
 #'
 #' @param x A SparkDataFrame
 #' @param col1 the name of the first column
 #' @param col2 the name of the second column
 #' @return the covariance of the two columns.
 #'
-#' @rdname statfunctions
+#' @rdname cov
 #' @name cov
 #' @export
 #' @examples
@@ -75,8 +74,6 @@ setMethod("cov",
             callJMethod(statFunctions, "cov", col1, col2)
           })
 
-#' corr
-#'
 #' Calculates the correlation of two columns of a SparkDataFrame.
 #' Currently only supports the Pearson Correlation Coefficient.
 #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
@@ -88,7 +85,7 @@ setMethod("cov",
 #'               only "pearson" is allowed now.
 #' @return The Pearson Correlation Coefficient as a Double.
 #'
-#' @rdname statfunctions
+#' @rdname corr
 #' @name corr
 #' @export
 #' @examples
@@ -106,9 +103,8 @@ setMethod("corr",
             callJMethod(statFunctions, "corr", col1, col2, method)
           })
 
-#' freqItems
-#'
-#' Finding frequent items for columns, possibly with false positives.
+#' @description
+#' freqItems - Finding frequent items for columns, possibly with false positives.
 #' Using the frequent element count algorithm described in
 #' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
 #'
@@ -134,10 +130,8 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
             collect(dataFrame(sct))
           })
 
-#' approxQuantile
-#'
-#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
-#'
+#' @description
+#' approxQuantile - Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
 #' The result of this algorithm has the following deterministic bound:
 #' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
 #' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
@@ -174,9 +168,9 @@ setMethod("approxQuantile",
                         as.list(probabilities), relativeError)
           })
 
-#' sampleBy
-#'
-#' Returns a stratified sample without replacement based on the fraction given on each stratum.
+#' @description
+#' sampleBy - Returns a stratified sample without replacement based on the fraction given on each
+#' stratum.
 #'
 #' @param x A SparkDataFrame
 #' @param col column that defines strata