1717
1818package org .apache .spark .sql
1919
20+ import java .lang .{String => JavaString }
21+ import java .util .{List => JavaList }
22+
23+ import scala .collection .JavaConversions ._
24+
2025import org .apache .spark .annotation .Experimental
2126import org .apache .spark .sql .execution .stat .FrequentItems
2227
@@ -31,9 +36,10 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
3136 * Finding frequent items for columns, possibly with false positives. Using the
3237 * frequent element count algorithm described in
3338 * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou ]].
39+ * The `support` should be greater than 1e-4.
3440 *
35- * @param cols the names of the columns to search frequent items in
36- * @param support The minimum frequency for an item to be considered `frequent` Should be greater
41+ * @param cols the names of the columns to search frequent items in.
42+ * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
3743 * than 1e-4.
3844 * @return A Local DataFrame with the Array of frequent items for each column.
3945 */
@@ -47,7 +53,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
4753 * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou ]].
4854 * Returns items more frequent than 1 percent.
4955 *
50- * @param cols the names of the columns to search frequent items in
56+ * @param cols the names of the columns to search frequent items in.
5157 * @return A Local DataFrame with the Array of frequent items for each column.
5258 */
5359 def freqItems (cols : Seq [String ]): DataFrame = {
@@ -58,13 +64,14 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
5864 * Finding frequent items for columns, possibly with false positives. Using the
5965 * frequent element count algorithm described in
6066 * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou ]].
67+ * The `support` should be greater than 1e-4.
6168 *
62- * @param cols the names of the columns to search frequent items in
63- * @param support The minimum frequency for an item to be considered `frequent` Should be greater
69+ * @param cols the names of the columns to search frequent items in.
70+ * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
6471 * than 1e-4.
6572 * @return A Local DataFrame with the Array of frequent items for each column.
6673 */
67- def freqItems (cols : List [ String ], support : Double ): DataFrame = {
74+ def freqItems (cols : JavaList [ JavaString ], support : Double ): DataFrame = {
6875 FrequentItems .singlePassFreqItems(df, cols, support)
6976 }
7077
@@ -74,10 +81,10 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
7481 * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou ]].
7582 * Returns items more frequent than 1 percent of the time.
7683 *
77- * @param cols the names of the columns to search frequent items in
84+ * @param cols the names of the columns to search frequent items in.
7885 * @return A Local DataFrame with the Array of frequent items for each column.
7986 */
80- def freqItems (cols : List [ String ]): DataFrame = {
87+ def freqItems (cols : JavaList [ JavaString ]): DataFrame = {
8188 FrequentItems .singlePassFreqItems(df, cols, 0.01 )
8289 }
8390}
0 commit comments