From 17f99d42db3db9209506d9f27a4bbb995dffd637 Mon Sep 17 00:00:00 2001 From: Louiszr Date: Sun, 23 Aug 2020 18:34:09 +0100 Subject: [PATCH 1/2] Removed foldCol related code --- python/pyspark/ml/tests/test_tuning.py | 3 --- python/pyspark/ml/tuning.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740f8c4f1..0001021020192 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ def test_copy(self): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ def test_copy(self): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -224,7 +222,6 @@ def test_save_load_trained_model(self): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef24d021..9e8da968a1d65 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -511,7 +511,6 @@ def _from_java(cls, java_stage): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), - "foldCol": java_stage.getFoldCol(), "seed": java_stage.getSeed(), } for param_name, param_val in params.items(): @@ -544,7 +543,6 @@ def _to_java(self): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": self.getNumFolds(), - "foldCol": self.getFoldCol(), "seed": self.getSeed(), } for param_name, param_val in params.items(): From 989650da057cf624543d7ecb36a8b9d178c3d7fe Mon Sep 17 00:00:00 2001 From: Louiszr Date: Sun, 23 Aug 2020 23:01:40 +0100 Subject: [PATCH 2/2] Fixed copy() to copy models instead of list --- python/pyspark/ml/tests/test_tuning.py | 8 ++++---- python/pyspark/ml/tuning.py | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index 0001021020192..b1acaf695245d 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -125,9 +125,9 @@ def test_copy(self): 'foo', "Changing the original avgMetrics should not affect the copied model" ) - cvModel.subModels[0] = 'foo' + cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( - cvModelCopied.subModels[0], + cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -777,9 +777,9 @@ def test_copy(self): 'foo', "Changing the original validationMetrics should not affect the copied model" ) - tvsModel.subModels[0] = 'foo' + tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( - tvsModelCopied.subModels[0], + tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 9e8da968a1d65..6283c8bfd3df9 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ def copy(self, extra=None): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) - subModels = [model.copy() for model in self.subModels] + subModels = [ + [sub_model.copy() for sub_model in fold_sub_models] + for fold_sub_models in self.subModels + ] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0")