Skip to content

Commit 52ea399

Browse files
yanboliangmengxr
authored andcommitted
[SPARK-10355] [ML] [PySpark] Add Python API for SQLTransformer
Add Python API for SQLTransformer Author: Yanbo Liang <[email protected]> Closes #8527 from yanboliang/spark-10355.
1 parent fe16fd0 commit 52ea399

File tree

1 file changed

+54
-3
lines changed

1 file changed

+54
-3
lines changed

python/pyspark/ml/feature.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828

2929
__all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel',
3030
'NGram', 'Normalizer', 'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer',
31-
'StandardScaler', 'StandardScalerModel', 'StringIndexer', 'StringIndexerModel',
32-
'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel',
33-
'PCA', 'PCAModel', 'RFormula', 'RFormulaModel']
31+
'SQLTransformer', 'StandardScaler', 'StandardScalerModel', 'StringIndexer',
32+
'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec',
33+
'Word2VecModel', 'PCA', 'PCAModel', 'RFormula', 'RFormulaModel']
3434

3535

3636
@inherit_doc
@@ -743,6 +743,57 @@ def getPattern(self):
743743
return self.getOrDefault(self.pattern)
744744

745745

746+
@inherit_doc
747+
class SQLTransformer(JavaTransformer):
748+
"""
749+
Implements the transforms which are defined by SQL statement.
750+
Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
751+
where '__THIS__' represents the underlying table of the input dataset.
752+
753+
>>> df = sqlContext.createDataFrame([(0, 1.0, 3.0), (2, 2.0, 5.0)], ["id", "v1", "v2"])
754+
>>> sqlTrans = SQLTransformer(
755+
... statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
756+
>>> sqlTrans.transform(df).head()
757+
Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)
758+
"""
759+
760+
# a placeholder to make it appear in the generated doc
761+
statement = Param(Params._dummy(), "statement", "SQL statement")
762+
763+
@keyword_only
764+
def __init__(self, statement=None):
765+
"""
766+
__init__(self, statement=None)
767+
"""
768+
super(SQLTransformer, self).__init__()
769+
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid)
770+
self.statement = Param(self, "statement", "SQL statement")
771+
kwargs = self.__init__._input_kwargs
772+
self.setParams(**kwargs)
773+
774+
@keyword_only
775+
def setParams(self, statement=None):
776+
"""
777+
setParams(self, statement=None)
778+
Sets params for this SQLTransformer.
779+
"""
780+
kwargs = self.setParams._input_kwargs
781+
return self._set(**kwargs)
782+
783+
def setStatement(self, value):
784+
"""
785+
Sets the value of :py:attr:`statement`.
786+
"""
787+
self._paramMap[self.statement] = value
788+
return self
789+
790+
def getStatement(self):
791+
"""
792+
Gets the value of statement or its default value.
793+
"""
794+
return self.getOrDefault(self.statement)
795+
796+
746797
@inherit_doc
747798
class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
748799
"""

0 commit comments

Comments
 (0)