@@ -169,7 +169,7 @@ def test_kmeans_deterministic(self):
169169
170170 def test_classification (self ):
171171 from pyspark .mllib .classification import LogisticRegressionWithSGD , SVMWithSGD , NaiveBayes
172- from pyspark .mllib .tree import DecisionTree
172+ from pyspark .mllib .tree import DecisionTree , RandomForest , GradientBoostedTrees
173173 data = [
174174 LabeledPoint (0.0 , [1 , 0 , 0 ]),
175175 LabeledPoint (1.0 , [0 , 1 , 1 ]),
@@ -198,18 +198,31 @@ def test_classification(self):
198198 self .assertTrue (nb_model .predict (features [3 ]) > 0 )
199199
200200 categoricalFeaturesInfo = {0 : 3 } # feature 0 has 3 categories
201- dt_model = \
202- DecisionTree .trainClassifier (rdd , numClasses = 2 ,
203- categoricalFeaturesInfo = categoricalFeaturesInfo )
201+ dt_model = DecisionTree .trainClassifier (
202+ rdd , numClasses = 2 , categoricalFeaturesInfo = categoricalFeaturesInfo )
204203 self .assertTrue (dt_model .predict (features [0 ]) <= 0 )
205204 self .assertTrue (dt_model .predict (features [1 ]) > 0 )
206205 self .assertTrue (dt_model .predict (features [2 ]) <= 0 )
207206 self .assertTrue (dt_model .predict (features [3 ]) > 0 )
208207
208+ rf_model = RandomForest .trainClassifier (
209+ rdd , numClasses = 2 , categoricalFeaturesInfo = categoricalFeaturesInfo , numTrees = 100 )
210+ self .assertTrue (rf_model .predict (features [0 ]) <= 0 )
211+ self .assertTrue (rf_model .predict (features [1 ]) > 0 )
212+ self .assertTrue (rf_model .predict (features [2 ]) <= 0 )
213+ self .assertTrue (rf_model .predict (features [3 ]) > 0 )
214+
215+ gbt_model = GradientBoostedTrees .trainClassifier (
216+ rdd , categoricalFeaturesInfo = categoricalFeaturesInfo )
217+ self .assertTrue (gbt_model .predict (features [0 ]) <= 0 )
218+ self .assertTrue (gbt_model .predict (features [1 ]) > 0 )
219+ self .assertTrue (gbt_model .predict (features [2 ]) <= 0 )
220+ self .assertTrue (gbt_model .predict (features [3 ]) > 0 )
221+
209222 def test_regression (self ):
210223 from pyspark .mllib .regression import LinearRegressionWithSGD , LassoWithSGD , \
211224 RidgeRegressionWithSGD
212- from pyspark .mllib .tree import DecisionTree
225+ from pyspark .mllib .tree import DecisionTree , RandomForest , GradientBoostedTrees
213226 data = [
214227 LabeledPoint (- 1.0 , [0 , - 1 ]),
215228 LabeledPoint (1.0 , [0 , 1 ]),
@@ -238,13 +251,27 @@ def test_regression(self):
238251 self .assertTrue (rr_model .predict (features [3 ]) > 0 )
239252
240253 categoricalFeaturesInfo = {0 : 2 } # feature 0 has 2 categories
241- dt_model = \
242- DecisionTree . trainRegressor ( rdd , categoricalFeaturesInfo = categoricalFeaturesInfo )
254+ dt_model = DecisionTree . trainRegressor (
255+ rdd , categoricalFeaturesInfo = categoricalFeaturesInfo )
243256 self .assertTrue (dt_model .predict (features [0 ]) <= 0 )
244257 self .assertTrue (dt_model .predict (features [1 ]) > 0 )
245258 self .assertTrue (dt_model .predict (features [2 ]) <= 0 )
246259 self .assertTrue (dt_model .predict (features [3 ]) > 0 )
247260
261+ rf_model = RandomForest .trainRegressor (
262+ rdd , categoricalFeaturesInfo = categoricalFeaturesInfo , numTrees = 100 )
263+ self .assertTrue (rf_model .predict (features [0 ]) <= 0 )
264+ self .assertTrue (rf_model .predict (features [1 ]) > 0 )
265+ self .assertTrue (rf_model .predict (features [2 ]) <= 0 )
266+ self .assertTrue (rf_model .predict (features [3 ]) > 0 )
267+
268+ gbt_model = GradientBoostedTrees .trainRegressor (
269+ rdd , categoricalFeaturesInfo = categoricalFeaturesInfo )
270+ self .assertTrue (gbt_model .predict (features [0 ]) <= 0 )
271+ self .assertTrue (gbt_model .predict (features [1 ]) > 0 )
272+ self .assertTrue (gbt_model .predict (features [2 ]) <= 0 )
273+ self .assertTrue (gbt_model .predict (features [3 ]) > 0 )
274+
248275
249276class StatTests (PySparkTestCase ):
250277 # SPARK-4023
0 commit comments