@@ -28,7 +28,9 @@ import org.apache.spark.rdd.RDD
2828 * Regression model for isotonic regression.
2929 *
3030 * @param boundaries Array of boundaries for which predictions are known.
31+ * Boundaries must be sorted in increasing order.
3132 * @param predictions Array of predictions associated to the boundaries at the same index.
33+ * Result of isotonic regression and therefore is monotone.
3234 */
3335class IsotonicRegressionModel (
3436 boundaries : Array [Double ],
@@ -75,67 +77,68 @@ class IsotonicRegressionModel (
7577 *
7678 * @param testData Feature to be labeled.
7779 * @return Predicted label.
78- * If testData exactly matches a boundary then associated prediction is directly returned
79- * If testData is lower or higher than all boundaries
80- * then first or last prediction is returned respectively
81- * If testData falls between two values in boundary then predictions is treated
82- * as piecewise linear function and interpolated value is returned
80+ * If testData exactly matches a boundary then associated prediction is directly returned.
81+ * If testData is lower or higher than all boundaries.
82+ * then first or last prediction is returned respectively.
83+ * If testData falls between two values in boundary array then predictions is treated
84+ * as piecewise linear function and interpolated value is returned.
8385 */
8486 def predict (testData : Double ): Double = {
8587
8688 def linearInterpolation (x1 : Double , y1 : Double , x2 : Double , y2 : Double , x : Double ): Double = {
8789 y1 + (y2 - y1) * (x - x1) / (x2 - x1)
8890 }
8991
90- val insertIndex = binarySearch(boundaries, testData)
91-
92- val normalisedInsertIndex = - insertIndex - 1
92+ val foundIndex = binarySearch(boundaries, testData)
93+ val insertIndex = - foundIndex - 1
9394
9495 // Find if the index was lower than all values,
95- // higher than all values, inbetween two values or exact match.
96- if (insertIndex == - 1 ) {
96+ // higher than all values, in between two values or exact match.
97+ if (insertIndex == 0 ) {
9798 predictions.head
98- } else if (normalisedInsertIndex == boundaries.length){
99+ } else if (insertIndex == boundaries.length){
99100 predictions.last
100- } else if (insertIndex < 0 ) {
101+ } else if (foundIndex < 0 ) {
101102 linearInterpolation(
102- boundaries(normalisedInsertIndex - 1 ),
103- predictions(normalisedInsertIndex - 1 ),
104- boundaries(normalisedInsertIndex ),
105- predictions(normalisedInsertIndex ),
103+ boundaries(insertIndex - 1 ),
104+ predictions(insertIndex - 1 ),
105+ boundaries(insertIndex ),
106+ predictions(insertIndex ),
106107 testData)
107108 } else {
108- predictions(insertIndex )
109+ predictions(foundIndex )
109110 }
110111 }
111112}
112113
113114/**
114115 * Isotonic regression.
115116 * Currently implemented using parallelized pool adjacent violators algorithm.
116- * Currently only univariate (single feature) algorithm supported.
117+ * Only univariate (single feature) algorithm supported.
117118 *
118119 * Sequential PAV implementation based on:
119120 * Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
120121 * "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
122+ * Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
121123 *
122- * Sequential PAV parallelized as per :
124+ * Sequential PAV parallelization based on :
123125 * Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
124126 * "An approach to parallelizing isotonic regression."
125127 * Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
128+ * Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf
126129 */
127130class IsotonicRegression private (private var isotonic : Boolean ) extends Serializable {
128131
129132 /**
130- * Constructs IsotonicRegression instance with default parameter isotonic = true
131- * @return New instance of IsotonicRegression
133+ * Constructs IsotonicRegression instance with default parameter isotonic = true.
134+ * @return New instance of IsotonicRegression.
132135 */
133136 def this () = this (true )
134137
135138 /**
136- * Sets the isotonic parameter
139+ * Sets the isotonic parameter.
137140 * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
138- * @return The instance of IsotonicRegression
141+ * @return This instance of IsotonicRegression.
139142 */
140143 def setIsotonic (isotonic : Boolean ): this .type = {
141144 this .isotonic = isotonic
@@ -148,7 +151,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
148151 * @param input RDD of tuples (label, feature, weight) where label is dependent variable
149152 * for which we calculate isotonic regression, feature is independent variable
150153 * and weight represents number of measures with default 1.
151- *
152154 * @return Isotonic regression model.
153155 */
154156 def run (input : RDD [(Double , Double , Double )]): IsotonicRegressionModel = {
@@ -186,7 +188,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
186188 /**
187189 * Performs a pool adjacent violators algorithm (PAV).
188190 * Uses approach with single processing of data where violators
189- * in previously processed data created by pooling are fixed immediatelly .
191+ * in previously processed data created by pooling are fixed immediately .
190192 * Uses optimization of discovering monotonicity violating sequences (blocks).
191193 *
192194 * @param input Input data of tuples (label, feature, weight).
0 commit comments