From 3598c9fb006f998abddf531d188b401ddb05cbf2 Mon Sep 17 00:00:00 2001 From: sethah Date: Wed, 23 Mar 2016 16:59:07 -0700 Subject: [PATCH 1/7] using _set in all params --- python/pyspark/ml/classification.py | 16 +++---- python/pyspark/ml/clustering.py | 10 ++--- python/pyspark/ml/evaluation.py | 16 ++++--- python/pyspark/ml/feature.py | 68 ++++++++++++++--------------- python/pyspark/ml/param/__init__.py | 4 +- python/pyspark/ml/recommendation.py | 22 +++++----- python/pyspark/ml/regression.py | 20 ++++----- python/pyspark/ml/tuning.py | 4 +- 8 files changed, 81 insertions(+), 79 deletions(-) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 922f8069fac4..59215e84c9bb 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -142,7 +142,7 @@ def setThreshold(self, value): Sets the value of :py:attr:`threshold`. Clears value of :py:attr:`thresholds` if it has been set. """ - self._paramMap[self.threshold] = value + self._set(threshold=value) if self.isSet(self.thresholds): del self._paramMap[self.thresholds] return self @@ -169,7 +169,7 @@ def setThresholds(self, value): Sets the value of :py:attr:`thresholds`. Clears value of :py:attr:`threshold` if it has been set. """ - self._paramMap[self.thresholds] = value + self._set(thresholds=value) if self.isSet(self.threshold): del self._paramMap[self.threshold] return self @@ -471,7 +471,7 @@ def setImpurity(self, value): """ Sets the value of :py:attr:`impurity`. """ - self._paramMap[self.impurity] = value + self._set(impurity=value) return self @since("1.6.0") @@ -820,7 +820,7 @@ def setLossType(self, value): """ Sets the value of :py:attr:`lossType`. """ - self._paramMap[self.lossType] = value + self._set(lossType=value) return self @since("1.4.0") @@ -950,7 +950,7 @@ def setSmoothing(self, value): """ Sets the value of :py:attr:`smoothing`. """ - self._paramMap[self.smoothing] = value + self._set(smoothing=value) return self @since("1.5.0") @@ -965,7 +965,7 @@ def setModelType(self, value): """ Sets the value of :py:attr:`modelType`. """ - self._paramMap[self.modelType] = value + self._set(modelType=value) return self @since("1.5.0") @@ -1095,7 +1095,7 @@ def setLayers(self, value): """ Sets the value of :py:attr:`layers`. """ - self._paramMap[self.layers] = value + self._set(layers=value) return self @since("1.6.0") @@ -1110,7 +1110,7 @@ def setBlockSize(self, value): """ Sets the value of :py:attr:`blockSize`. """ - self._paramMap[self.blockSize] = value + self._set(blockSize=value) return self @since("1.6.0") diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index f071c597c87f..64c4bf1b927d 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -130,7 +130,7 @@ def setK(self, value): """ Sets the value of :py:attr:`k`. """ - self._paramMap[self.k] = value + self._set(k=value) return self @since("1.5.0") @@ -145,7 +145,7 @@ def setInitMode(self, value): """ Sets the value of :py:attr:`initMode`. """ - self._paramMap[self.initMode] = value + self._set(initMode=value) return self @since("1.5.0") @@ -160,7 +160,7 @@ def setInitSteps(self, value): """ Sets the value of :py:attr:`initSteps`. """ - self._paramMap[self.initSteps] = value + self._set(initSteps=value) return self @since("1.5.0") @@ -280,7 +280,7 @@ def setK(self, value): """ Sets the value of :py:attr:`k`. """ - self._paramMap[self.k] = value + self._set(k=value) return self @since("2.0.0") @@ -295,7 +295,7 @@ def setMinDivisibleClusterSize(self, value): """ Sets the value of :py:attr:`minDivisibleClusterSize`. """ - self._paramMap[self.minDivisibleClusterSize] = value + self._set(minDivisibleClusterSize=value) return self @since("2.0.0") diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 4b0bade10280..129701c4e6c1 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -19,7 +19,7 @@ from pyspark import since from pyspark.ml.wrapper import JavaParams -from pyspark.ml.param import Param, Params +from pyspark.ml.param import Param, Params, TypeConverters from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol from pyspark.ml.util import keyword_only from pyspark.mllib.common import inherit_doc @@ -125,7 +125,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction """ metricName = Param(Params._dummy(), "metricName", - "metric name in evaluation (areaUnderROC|areaUnderPR)") + "metric name in evaluation (areaUnderROC|areaUnderPR)", + TypeConverters.toString) @keyword_only def __init__(self, rawPredictionCol="rawPrediction", labelCol="label", @@ -147,7 +148,7 @@ def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. """ - self._paramMap[self.metricName] = value + self._set(metricName=value) return self @since("1.4.0") @@ -194,7 +195,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): # when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`), # we take and output the negative of this metric. metricName = Param(Params._dummy(), "metricName", - "metric name in evaluation (mse|rmse|r2|mae)") + "metric name in evaluation (mse|rmse|r2|mae)", TypeConverters.toString) @keyword_only def __init__(self, predictionCol="prediction", labelCol="label", @@ -216,7 +217,7 @@ def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. """ - self._paramMap[self.metricName] = value + self._set(metricName=value) return self @since("1.4.0") @@ -260,7 +261,8 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio """ metricName = Param(Params._dummy(), "metricName", "metric name in evaluation " - "(f1|precision|recall|weightedPrecision|weightedRecall)") + "(f1|precision|recall|weightedPrecision|weightedRecall)", + TypeConverters.toString) @keyword_only def __init__(self, predictionCol="prediction", labelCol="label", @@ -282,7 +284,7 @@ def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. """ - self._paramMap[self.metricName] = value + self._set(metricName=value) return self @since("1.5.0") diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 86b53285b5b0..0bdb78e7a73c 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -112,7 +112,7 @@ def setThreshold(self, value): """ Sets the value of :py:attr:`threshold`. """ - self._paramMap[self.threshold] = value + self._set(threshold=value) return self @since("1.4.0") @@ -188,7 +188,7 @@ def setSplits(self, value): """ Sets the value of :py:attr:`splits`. """ - self._paramMap[self.splits] = value + self._set(splits=value) return self @since("1.4.0") @@ -284,7 +284,7 @@ def setMinTF(self, value): """ Sets the value of :py:attr:`minTF`. """ - self._paramMap[self.minTF] = value + self._set(minTF=value) return self @since("1.6.0") @@ -299,7 +299,7 @@ def setMinDF(self, value): """ Sets the value of :py:attr:`minDF`. """ - self._paramMap[self.minDF] = value + self._set(minDF=value) return self @since("1.6.0") @@ -314,7 +314,7 @@ def setVocabSize(self, value): """ Sets the value of :py:attr:`vocabSize`. """ - self._paramMap[self.vocabSize] = value + self._set(vocabSize=value) return self @since("1.6.0") @@ -407,7 +407,7 @@ def setInverse(self, value): """ Sets the value of :py:attr:`inverse`. """ - self._paramMap[self.inverse] = value + self._set(inverse=value) return self @since("1.6.0") @@ -474,7 +474,7 @@ def setScalingVec(self, value): """ Sets the value of :py:attr:`scalingVec`. """ - self._paramMap[self.scalingVec] = value + self._set(scalingVec=value) return self @since("1.5.0") @@ -597,7 +597,7 @@ def setMinDocFreq(self, value): """ Sets the value of :py:attr:`minDocFreq`. """ - self._paramMap[self.minDocFreq] = value + self._set(minDocFreq=value) return self @since("1.4.0") @@ -782,7 +782,7 @@ def setMin(self, value): """ Sets the value of :py:attr:`min`. """ - self._paramMap[self.min] = value + self._set(min=value) return self @since("1.6.0") @@ -797,7 +797,7 @@ def setMax(self, value): """ Sets the value of :py:attr:`max`. """ - self._paramMap[self.max] = value + self._set(max=value) return self @since("1.6.0") @@ -906,7 +906,7 @@ def setN(self, value): """ Sets the value of :py:attr:`n`. """ - self._paramMap[self.n] = value + self._set(n=value) return self @since("1.5.0") @@ -973,7 +973,7 @@ def setP(self, value): """ Sets the value of :py:attr:`p`. """ - self._paramMap[self.p] = value + self._set(p=value) return self @since("1.4.0") @@ -1056,7 +1056,7 @@ def setDropLast(self, value): """ Sets the value of :py:attr:`dropLast`. """ - self._paramMap[self.dropLast] = value + self._set(dropLast=value) return self @since("1.4.0") @@ -1125,7 +1125,7 @@ def setDegree(self, value): """ Sets the value of :py:attr:`degree`. """ - self._paramMap[self.degree] = value + self._set(degree=value) return self @since("1.4.0") @@ -1207,7 +1207,7 @@ def setNumBuckets(self, value): """ Sets the value of :py:attr:`numBuckets`. """ - self._paramMap[self.numBuckets] = value + self._set(numBuckets=value) return self @since("2.0.0") @@ -1305,7 +1305,7 @@ def setMinTokenLength(self, value): """ Sets the value of :py:attr:`minTokenLength`. """ - self._paramMap[self.minTokenLength] = value + self._set(minTokenLength=value) return self @since("1.4.0") @@ -1320,7 +1320,7 @@ def setGaps(self, value): """ Sets the value of :py:attr:`gaps`. """ - self._paramMap[self.gaps] = value + self._set(gaps=value) return self @since("1.4.0") @@ -1335,7 +1335,7 @@ def setPattern(self, value): """ Sets the value of :py:attr:`pattern`. """ - self._paramMap[self.pattern] = value + self._set(pattern=value) return self @since("1.4.0") @@ -1350,7 +1350,7 @@ def setToLowercase(self, value): """ Sets the value of :py:attr:`toLowercase`. """ - self._paramMap[self.toLowercase] = value + self._set(toLowercase=value) return self @since("2.0.0") @@ -1411,7 +1411,7 @@ def setStatement(self, value): """ Sets the value of :py:attr:`statement`. """ - self._paramMap[self.statement] = value + self._set(statement=value) return self @since("1.6.0") @@ -1488,7 +1488,7 @@ def setWithMean(self, value): """ Sets the value of :py:attr:`withMean`. """ - self._paramMap[self.withMean] = value + self._set(withMean=value) return self @since("1.4.0") @@ -1503,7 +1503,7 @@ def setWithStd(self, value): """ Sets the value of :py:attr:`withStd`. """ - self._paramMap[self.withStd] = value + self._set(withStd=value) return self @since("1.4.0") @@ -1672,7 +1672,7 @@ def setLabels(self, value): """ Sets the value of :py:attr:`labels`. """ - self._paramMap[self.labels] = value + self._set(labels=value) return self @since("1.6.0") @@ -1743,7 +1743,7 @@ def setStopWords(self, value): """ Specify the stopwords to be filtered. """ - self._paramMap[self.stopWords] = value + self._set(stopWords=value) return self @since("1.6.0") @@ -1758,7 +1758,7 @@ def setCaseSensitive(self, value): """ Set whether to do a case sensitive comparison over the stop words """ - self._paramMap[self.caseSensitive] = value + self._set(caseSensitive=value) return self @since("1.6.0") @@ -1975,7 +1975,7 @@ def setMaxCategories(self, value): """ Sets the value of :py:attr:`maxCategories`. """ - self._paramMap[self.maxCategories] = value + self._set(maxCategories=value) return self @since("1.4.0") @@ -2085,7 +2085,7 @@ def setIndices(self, value): """ Sets the value of :py:attr:`indices`. """ - self._paramMap[self.indices] = value + self._set(indices=value) return self @since("1.6.0") @@ -2100,7 +2100,7 @@ def setNames(self, value): """ Sets the value of :py:attr:`names`. """ - self._paramMap[self.names] = value + self._set(names=value) return self @since("1.6.0") @@ -2205,7 +2205,7 @@ def setVectorSize(self, value): """ Sets the value of :py:attr:`vectorSize`. """ - self._paramMap[self.vectorSize] = value + self._set(vectorSize=value) return self @since("1.4.0") @@ -2220,7 +2220,7 @@ def setNumPartitions(self, value): """ Sets the value of :py:attr:`numPartitions`. """ - self._paramMap[self.numPartitions] = value + self._set(numPartitions=value) return self @since("1.4.0") @@ -2235,7 +2235,7 @@ def setMinCount(self, value): """ Sets the value of :py:attr:`minCount`. """ - self._paramMap[self.minCount] = value + self._set(minCount=value) return self @since("1.4.0") @@ -2341,7 +2341,7 @@ def setK(self, value): """ Sets the value of :py:attr:`k`. """ - self._paramMap[self.k] = value + self._set(k=value) return self @since("1.5.0") @@ -2473,7 +2473,7 @@ def setFormula(self, value): """ Sets the value of :py:attr:`formula`. """ - self._paramMap[self.formula] = value + self._set(formula=value) return self @since("1.5.0") @@ -2565,7 +2565,7 @@ def setNumTopFeatures(self, value): """ Sets the value of :py:attr:`numTopFeatures`. """ - self._paramMap[self.numTopFeatures] = value + self._set(numTopFeatures=value) return self @since("2.0.0") diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index a1265294a1e9..9441ceb3447f 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -389,8 +389,8 @@ def copy(self, extra=None): if extra is None: extra = dict() that = copy.copy(self) - that._paramMap = self.extractParamMap(extra) - return that + that._paramMap = copy.copy(self._paramMap) + return self._copyValues(that, extra) def _shouldOwn(self, param): """ diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 7c7a1b67a100..9c38f2431b8d 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -157,7 +157,7 @@ def setRank(self, value): """ Sets the value of :py:attr:`rank`. """ - self._paramMap[self.rank] = value + self._set(rank=value) return self @since("1.4.0") @@ -172,7 +172,7 @@ def setNumUserBlocks(self, value): """ Sets the value of :py:attr:`numUserBlocks`. """ - self._paramMap[self.numUserBlocks] = value + self._set(numUserBlocks=value) return self @since("1.4.0") @@ -187,7 +187,7 @@ def setNumItemBlocks(self, value): """ Sets the value of :py:attr:`numItemBlocks`. """ - self._paramMap[self.numItemBlocks] = value + self._set(numItemBlocks=value) return self @since("1.4.0") @@ -202,15 +202,15 @@ def setNumBlocks(self, value): """ Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value. """ - self._paramMap[self.numUserBlocks] = value - self._paramMap[self.numItemBlocks] = value + self._set(numUserBlocks=value) + self._set(numItemBlocks=value) @since("1.4.0") def setImplicitPrefs(self, value): """ Sets the value of :py:attr:`implicitPrefs`. """ - self._paramMap[self.implicitPrefs] = value + self._set(implicitPrefs=value) return self @since("1.4.0") @@ -225,7 +225,7 @@ def setAlpha(self, value): """ Sets the value of :py:attr:`alpha`. """ - self._paramMap[self.alpha] = value + self._set(alpha=value) return self @since("1.4.0") @@ -240,7 +240,7 @@ def setUserCol(self, value): """ Sets the value of :py:attr:`userCol`. """ - self._paramMap[self.userCol] = value + self._set(userCol=value) return self @since("1.4.0") @@ -255,7 +255,7 @@ def setItemCol(self, value): """ Sets the value of :py:attr:`itemCol`. """ - self._paramMap[self.itemCol] = value + self._set(itemCol=value) return self @since("1.4.0") @@ -270,7 +270,7 @@ def setRatingCol(self, value): """ Sets the value of :py:attr:`ratingCol`. """ - self._paramMap[self.ratingCol] = value + self._set(ratingCol=value) return self @since("1.4.0") @@ -285,7 +285,7 @@ def setNonnegative(self, value): """ Sets the value of :py:attr:`nonnegative`. """ - self._paramMap[self.nonnegative] = value + self._set(nonnegative=value) return self @since("1.4.0") diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 316d7e30bcf1..582758c61bbf 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -478,7 +478,7 @@ def setIsotonic(self, value): """ Sets the value of :py:attr:`isotonic`. """ - self._paramMap[self.isotonic] = value + self._set(isotonic=value) return self def getIsotonic(self): @@ -491,7 +491,7 @@ def setFeatureIndex(self, value): """ Sets the value of :py:attr:`featureIndex`. """ - self._paramMap[self.featureIndex] = value + self._set(featureIndex=value) return self def getFeatureIndex(self): @@ -541,7 +541,7 @@ def setSubsamplingRate(self, value): """ Sets the value of :py:attr:`subsamplingRate`. """ - self._paramMap[self.subsamplingRate] = value + self._set(subsamplingRate=value) return self @since("1.4.0") @@ -571,7 +571,7 @@ def setImpurity(self, value): """ Sets the value of :py:attr:`impurity`. """ - self._paramMap[self.impurity] = value + self._set(impurity=value) return self @since("1.4.0") @@ -604,7 +604,7 @@ def setNumTrees(self, value): """ Sets the value of :py:attr:`numTrees`. """ - self._paramMap[self.numTrees] = value + self._set(numTrees=value) return self @since("1.4.0") @@ -619,7 +619,7 @@ def setFeatureSubsetStrategy(self, value): """ Sets the value of :py:attr:`featureSubsetStrategy`. """ - self._paramMap[self.featureSubsetStrategy] = value + self._set(featureSubsetStrategy=value) return self @since("1.4.0") @@ -978,7 +978,7 @@ def setLossType(self, value): """ Sets the value of :py:attr:`lossType`. """ - self._paramMap[self.lossType] = value + self._set(lossType=value) return self @since("1.4.0") @@ -1113,7 +1113,7 @@ def setCensorCol(self, value): """ Sets the value of :py:attr:`censorCol`. """ - self._paramMap[self.censorCol] = value + self._set(censorCol=value) return self @since("1.6.0") @@ -1128,7 +1128,7 @@ def setQuantileProbabilities(self, value): """ Sets the value of :py:attr:`quantileProbabilities`. """ - self._paramMap[self.quantileProbabilities] = value + self._set(quantileProbabilities=value) return self @since("1.6.0") @@ -1143,7 +1143,7 @@ def setQuantilesCol(self, value): """ Sets the value of :py:attr:`quantilesCol`. """ - self._paramMap[self.quantilesCol] = value + self._set(quantilesCol=value) return self @since("1.6.0") diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 456d79d897e0..5ac539eddea5 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -228,7 +228,7 @@ def setNumFolds(self, value): """ Sets the value of :py:attr:`numFolds`. """ - self._paramMap[self.numFolds] = value + self._set(numFolds=value) return self @since("1.4.0") @@ -479,7 +479,7 @@ def setTrainRatio(self, value): """ Sets the value of :py:attr:`trainRatio`. """ - self._paramMap[self.trainRatio] = value + self._set(trainRatio=value) return self @since("2.0.0") From 436745f120bea9b3d4d37f2505e6b6df950e7e10 Mon Sep 17 00:00:00 2001 From: sethah Date: Thu, 24 Mar 2016 10:31:58 -0700 Subject: [PATCH 2/7] cleaning up --- python/pyspark/ml/feature.py | 2 +- python/pyspark/ml/param/__init__.py | 4 ++-- python/pyspark/ml/param/_shared_params_code_gen.py | 2 +- python/pyspark/ml/param/shared.py | 2 +- python/pyspark/ml/pipeline.py | 2 +- python/pyspark/ml/wrapper.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 0bdb78e7a73c..653b2a025ebb 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1554,7 +1554,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, The indices are in [0, numLabels), ordered by label frequencies. So the most frequent label gets index 0. - >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid='error') >>> model = stringIndexer.fit(stringIndDf) >>> td = model.transform(stringIndDf) >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]), diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 9441ceb3447f..75138fc44efb 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -389,7 +389,7 @@ def copy(self, extra=None): if extra is None: extra = dict() that = copy.copy(self) - that._paramMap = copy.copy(self._paramMap) + that._paramMap = {} return self._copyValues(that, extra) def _shouldOwn(self, param): @@ -435,7 +435,7 @@ def _set(self, **kwargs): try: value = p.typeConverter(value) except TypeError as e: - raise TypeError('Invalid param value given for param "%s". %s' % (p.name, e)) + raise TypeError('Invalid param value %s given for param "%s". %s' % (value, p.name, e)) self._paramMap[p] = value return self diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index a7615c43bee2..a2acf956bc2a 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -131,7 +131,7 @@ def get$Name(self): "TypeConverters.toFloat"), ("handleInvalid", "how to handle invalid entries. Options are skip (which will filter " + "out rows with bad values), or error (which will throw an errror). More options may be " + - "added later.", None, "TypeConverters.toBoolean"), + "added later.", None, "TypeConverters.toString"), ("elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " + "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", "0.0", "TypeConverters.toFloat"), diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index c9e975525ce1..538c0b718ad9 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -392,7 +392,7 @@ class HasHandleInvalid(Params): Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later. """ - handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later.", typeConverter=TypeConverters.toBoolean) + handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later.", typeConverter=TypeConverters.toString) def __init__(self): super(HasHandleInvalid, self).__init__() diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index 9d654e8b0f8d..6f599b51596f 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -90,7 +90,7 @@ def setStages(self, value): :param value: a list of transformers or estimators :return: the pipeline instance """ - self._paramMap[self.stages] = value + self._set(stages=value) return self @since("1.3.0") diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index cd0e5b80d555..055a2816f8d7 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -112,7 +112,7 @@ def _transfer_params_from_java(self): java_param = self._java_obj.getParam(param.name) if self._java_obj.isDefined(java_param): value = _java2py(sc, self._java_obj.getOrDefault(java_param)) - self._paramMap[param] = value + self._set(**{param.name: value}) def _transfer_param_map_from_java(self, javaParamMap): """ From ea022256a302d2ece8306eb925ed6a928efc6600 Mon Sep 17 00:00:00 2001 From: sethah Date: Thu, 24 Mar 2016 10:40:25 -0700 Subject: [PATCH 3/7] style fix --- python/pyspark/ml/param/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 75138fc44efb..310c1f3e5f1a 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -435,7 +435,7 @@ def _set(self, **kwargs): try: value = p.typeConverter(value) except TypeError as e: - raise TypeError('Invalid param value %s given for param "%s". %s' % (value, p.name, e)) + raise TypeError('Invalid param value given for param "%s". %s' % (p.name, e)) self._paramMap[p] = value return self From 0c0fc63411d13a02b881c4218889a7c8a9bd1866 Mon Sep 17 00:00:00 2001 From: sethah Date: Thu, 24 Mar 2016 11:36:41 -0700 Subject: [PATCH 4/7] _setDefault uses typeConverter --- python/pyspark/ml/feature.py | 2 +- python/pyspark/ml/param/__init__.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 653b2a025ebb..736ccbae1e35 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1721,7 +1721,7 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StopWordsRemover", self.uid) stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWords - defaultStopWords = stopWordsObj.English() + defaultStopWords = list(stopWordsObj.English()) self._setDefault(stopWords=defaultStopWords, caseSensitive=False) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 310c1f3e5f1a..c94fa2e6f20d 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -444,7 +444,14 @@ def _setDefault(self, **kwargs): Sets default params. """ for param, value in kwargs.items(): - self._defaultParamMap[getattr(self, param)] = value + p = getattr(self, param) + if value is not None: + try: + value = p.typeConverter(value) + except TypeError as e: + raise TypeError('Invalid default param value given for param "%s". %s' + % (p.name, e)) + self._defaultParamMap[p] = value return self def _copyValues(self, to, extra=None): From c8645970633a19e0a69cf46d07ee6634736c5aae Mon Sep 17 00:00:00 2001 From: sethah Date: Fri, 1 Apr 2016 11:36:52 -0700 Subject: [PATCH 5/7] set default ignores java objects --- python/pyspark/ml/feature.py | 9 +++++++-- python/pyspark/ml/param/__init__.py | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 736ccbae1e35..3260e48118db 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -19,6 +19,8 @@ if sys.version > '3': basestring = str +from py4j.java_collections import JavaArray + from pyspark import since from pyspark.rdd import ignore_unicode_prefix from pyspark.ml.param.shared import * @@ -1721,7 +1723,7 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StopWordsRemover", self.uid) stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWords - defaultStopWords = list(stopWordsObj.English()) + defaultStopWords = stopWordsObj.English() self._setDefault(stopWords=defaultStopWords, caseSensitive=False) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -1751,7 +1753,10 @@ def getStopWords(self): """ Get the stopwords. """ - return self.getOrDefault(self.stopWords) + stopwords = self.getOrDefault(self.stopWords) + if isinstance(stopwords, JavaArray): + stopwords = list(stopwords) + return stopwords @since("1.6.0") def setCaseSensitive(self, value): diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index c94fa2e6f20d..48d35db8c9c9 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -26,6 +26,8 @@ import numpy as np import warnings +from py4j.java_gateway import JavaObject + from pyspark import since from pyspark.ml.util import Identifiable from pyspark.mllib.linalg import DenseVector, Vector @@ -445,7 +447,7 @@ def _setDefault(self, **kwargs): """ for param, value in kwargs.items(): p = getattr(self, param) - if value is not None: + if value is not None and not isinstance(value, JavaObject): try: value = p.typeConverter(value) except TypeError as e: From 8079c1135d42d3eee85a5d996dd6e21ad0e01522 Mon Sep 17 00:00:00 2001 From: sethah Date: Wed, 13 Apr 2016 17:27:16 -0700 Subject: [PATCH 6/7] updating with master --- python/pyspark/ml/classification.py | 6 ++---- python/pyspark/ml/param/__init__.py | 7 +++++++ python/pyspark/ml/regression.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 59215e84c9bb..00c88e983dc8 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -143,8 +143,7 @@ def setThreshold(self, value): Clears value of :py:attr:`thresholds` if it has been set. """ self._set(threshold=value) - if self.isSet(self.thresholds): - del self._paramMap[self.thresholds] + self._clearParam(self.thresholds) return self @since("1.4.0") @@ -170,8 +169,7 @@ def setThresholds(self, value): Clears value of :py:attr:`threshold` if it has been set. """ self._set(thresholds=value) - if self.isSet(self.threshold): - del self._paramMap[self.threshold] + self._clearParam(self.threshold) return self @since("1.5.0") diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 48d35db8c9c9..ffc25de06e94 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -441,6 +441,13 @@ def _set(self, **kwargs): self._paramMap[p] = value return self + def _clearParam(self, param): + """ + Clears a param from the param map if it has been explicitly set. + """ + if self.isSet(param): + del self._paramMap[param] + def _setDefault(self, **kwargs): """ Sets default params. diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 582758c61bbf..0ebe3307c1d5 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -1292,7 +1292,7 @@ def setFamily(self, value): """ Sets the value of :py:attr:`family`. """ - self._paramMap[self.family] = value + self._set(family=value) return self @since("2.0.0") @@ -1307,7 +1307,7 @@ def setLink(self, value): """ Sets the value of :py:attr:`link`. """ - self._paramMap[self.link] = value + self._set(link=value) return self @since("2.0.0") From 37b9ac5c07bccb74620314ddd133edbbdcacce53 Mon Sep 17 00:00:00 2001 From: sethah Date: Fri, 15 Apr 2016 07:49:32 -0700 Subject: [PATCH 7/7] code review --- python/pyspark/ml/classification.py | 4 ++-- python/pyspark/ml/evaluation.py | 7 ++++--- python/pyspark/ml/feature.py | 5 +---- python/pyspark/ml/param/__init__.py | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 00c88e983dc8..1bc938d3bb4a 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -143,7 +143,7 @@ def setThreshold(self, value): Clears value of :py:attr:`thresholds` if it has been set. """ self._set(threshold=value) - self._clearParam(self.thresholds) + self._clear(self.thresholds) return self @since("1.4.0") @@ -169,7 +169,7 @@ def setThresholds(self, value): Clears value of :py:attr:`threshold` if it has been set. """ self._set(thresholds=value) - self._clearParam(self.threshold) + self._clear(self.threshold) return self @since("1.5.0") diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 129701c4e6c1..52a3fe898574 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -126,7 +126,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction metricName = Param(Params._dummy(), "metricName", "metric name in evaluation (areaUnderROC|areaUnderPR)", - TypeConverters.toString) + typeConverter=TypeConverters.toString) @keyword_only def __init__(self, rawPredictionCol="rawPrediction", labelCol="label", @@ -195,7 +195,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): # when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`), # we take and output the negative of this metric. metricName = Param(Params._dummy(), "metricName", - "metric name in evaluation (mse|rmse|r2|mae)", TypeConverters.toString) + "metric name in evaluation (mse|rmse|r2|mae)", + typeConverter=TypeConverters.toString) @keyword_only def __init__(self, predictionCol="prediction", labelCol="label", @@ -262,7 +263,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio metricName = Param(Params._dummy(), "metricName", "metric name in evaluation " "(f1|precision|recall|weightedPrecision|weightedRecall)", - TypeConverters.toString) + typeConverter=TypeConverters.toString) @keyword_only def __init__(self, predictionCol="prediction", labelCol="label", diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 3260e48118db..18d156fb9939 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1753,10 +1753,7 @@ def getStopWords(self): """ Get the stopwords. """ - stopwords = self.getOrDefault(self.stopWords) - if isinstance(stopwords, JavaArray): - stopwords = list(stopwords) - return stopwords + return self.getOrDefault(self.stopWords) @since("1.6.0") def setCaseSensitive(self, value): diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index ffc25de06e94..9f0b063aace5 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -441,7 +441,7 @@ def _set(self, **kwargs): self._paramMap[p] = value return self - def _clearParam(self, param): + def _clear(self, param): """ Clears a param from the param map if it has been explicitly set. """