@@ -32,19 +32,21 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
3232 * `Bucketizer` maps a column of continuous features to a column of feature buckets.
3333 */
3434@ AlphaComponent
35- private [ml] final class Bucketizer (override val parent : Estimator [Bucketizer ])
35+ final class Bucketizer private [ml] (override val parent : Estimator [Bucketizer ])
3636 extends Model [Bucketizer ] with HasInputCol with HasOutputCol {
3737
3838 def this () = this (null )
3939
4040 /**
4141 * Parameter for mapping continuous features into buckets. With n splits, there are n+1 buckets.
42- * A bucket defined by splits x,y holds values in the range [x,y).
42+ * A bucket defined by splits x,y holds values in the range [x,y). Note that the splits should be
43+ * strictly increasing.
4344 * @group param
4445 */
4546 val splits : Param [Array [Double ]] = new Param [Array [Double ]](this , " splits" ,
46- " Split points for mapping continuous features into buckets. With n splits, there are n+1" +
47- " buckets. A bucket defined by splits x,y holds values in the range [x,y)." ,
47+ " Split points for mapping continuous features into buckets. With n splits, there are n+1 " +
48+ " buckets. A bucket defined by splits x,y holds values in the range [x,y). The splits " +
49+ " should be strictly increasing." ,
4850 Bucketizer .checkSplits)
4951
5052 /** @group getParam */
@@ -53,9 +55,15 @@ private[ml] final class Bucketizer(override val parent: Estimator[Bucketizer])
5355 /** @group setParam */
5456 def setSplits (value : Array [Double ]): this .type = set(splits, value)
5557
56- /** @group Param */
58+ /**
59+ * An indicator of the inclusiveness of negative infinite. If true, then use implicit bin
60+ * (-inf, getSplits.head). If false, then throw exception if values < getSplits.head are
61+ * encountered.
62+ * @group Param */
5763 val lowerInclusive : BooleanParam = new BooleanParam (this , " lowerInclusive" ,
58- " An indicator of the inclusiveness of negative infinite." )
64+ " An indicator of the inclusiveness of negative infinite. If true, then use implicit bin " +
65+ " (-inf, getSplits.head). If false, then throw exception if values < getSplits.head are " +
66+ " encountered." )
5967 setDefault(lowerInclusive -> true )
6068
6169 /** @group getParam */
@@ -64,9 +72,15 @@ private[ml] final class Bucketizer(override val parent: Estimator[Bucketizer])
6472 /** @group setParam */
6573 def setLowerInclusive (value : Boolean ): this .type = set(lowerInclusive, value)
6674
67- /** @group Param */
75+ /**
76+ * An indicator of the inclusiveness of positive infinite. If true, then use implicit bin
77+ * [getSplits.last, inf). If false, then throw exception if values > getSplits.last are
78+ * encountered.
79+ * @group Param */
6880 val upperInclusive : BooleanParam = new BooleanParam (this , " upperInclusive" ,
69- " An indicator of the inclusiveness of positive infinite." )
81+ " An indicator of the inclusiveness of positive infinite. If true, then use implicit bin " +
82+ " [getSplits.last, inf). If false, then throw exception if values > getSplits.last are " +
83+ " encountered." )
7084 setDefault(upperInclusive -> true )
7185
7286 /** @group getParam */
@@ -93,9 +107,7 @@ private[ml] final class Bucketizer(override val parent: Estimator[Bucketizer])
93107 }
94108
95109 private def prepOutputField (schema : StructType ): StructField = {
96- val attr = new NominalAttribute (
97- name = Some ($(outputCol)),
98- isOrdinal = Some (true ),
110+ val attr = new NominalAttribute (name = Some ($(outputCol)), isOrdinal = Some (true ),
99111 values = Some ($(splits).map(_.toString)))
100112
101113 attr.toStructField()
@@ -109,7 +121,7 @@ private[ml] final class Bucketizer(override val parent: Estimator[Bucketizer])
109121 }
110122}
111123
112- object Bucketizer {
124+ private [feature] object Bucketizer {
113125 /**
114126 * The given splits should match 1) its size is larger than zero; 2) it is ordered in a strictly
115127 * increasing way.
@@ -137,8 +149,8 @@ object Bucketizer {
137149 lowerInclusive : Boolean ,
138150 upperInclusive : Boolean ): Double = {
139151 if ((feature < splits.head && ! lowerInclusive) || (feature > splits.last && ! upperInclusive)) {
140- throw new Exception (s " Feature $feature out of bound, check your features or loose the " +
141- s " lower/upper bound constraint. " )
152+ throw new RuntimeException (s " Feature $feature out of bound, check your features or loosen " +
153+ s " the lower/upper bound constraint." )
142154 }
143155 var left = 0
144156 var right = splits.length - 2
@@ -153,6 +165,6 @@ object Bucketizer {
153165 left = mid + 1
154166 }
155167 }
156- throw new Exception (s " Failed to find a bucket for feature $feature. " )
168+ throw new RuntimeException (s " Unexpected error: failed to find a bucket for feature $feature. " )
157169 }
158170}
0 commit comments