|
28 | 28 |
|
29 | 29 | __all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', |
30 | 30 | 'NGram', 'Normalizer', 'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer', |
31 | | - 'StandardScaler', 'StandardScalerModel', 'StringIndexer', 'StringIndexerModel', |
32 | | - 'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel', |
33 | | - 'PCA', 'PCAModel', 'RFormula', 'RFormulaModel'] |
| 31 | + 'SQLTransformer', 'StandardScaler', 'StandardScalerModel', 'StringIndexer', |
| 32 | + 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec', |
| 33 | + 'Word2VecModel', 'PCA', 'PCAModel', 'RFormula', 'RFormulaModel'] |
34 | 34 |
|
35 | 35 |
|
36 | 36 | @inherit_doc |
@@ -743,6 +743,57 @@ def getPattern(self): |
743 | 743 | return self.getOrDefault(self.pattern) |
744 | 744 |
|
745 | 745 |
|
| 746 | +@inherit_doc |
| 747 | +class SQLTransformer(JavaTransformer): |
| 748 | + """ |
| 749 | + Implements the transforms which are defined by SQL statement. |
| 750 | + Currently we only support SQL syntax like 'SELECT ... FROM __THIS__' |
| 751 | + where '__THIS__' represents the underlying table of the input dataset. |
| 752 | +
|
| 753 | + >>> df = sqlContext.createDataFrame([(0, 1.0, 3.0), (2, 2.0, 5.0)], ["id", "v1", "v2"]) |
| 754 | + >>> sqlTrans = SQLTransformer( |
| 755 | + ... statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__") |
| 756 | + >>> sqlTrans.transform(df).head() |
| 757 | + Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0) |
| 758 | + """ |
| 759 | + |
| 760 | + # a placeholder to make it appear in the generated doc |
| 761 | + statement = Param(Params._dummy(), "statement", "SQL statement") |
| 762 | + |
| 763 | + @keyword_only |
| 764 | + def __init__(self, statement=None): |
| 765 | + """ |
| 766 | + __init__(self, statement=None) |
| 767 | + """ |
| 768 | + super(SQLTransformer, self).__init__() |
| 769 | + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid) |
| 770 | + self.statement = Param(self, "statement", "SQL statement") |
| 771 | + kwargs = self.__init__._input_kwargs |
| 772 | + self.setParams(**kwargs) |
| 773 | + |
| 774 | + @keyword_only |
| 775 | + def setParams(self, statement=None): |
| 776 | + """ |
| 777 | + setParams(self, statement=None) |
| 778 | + Sets params for this SQLTransformer. |
| 779 | + """ |
| 780 | + kwargs = self.setParams._input_kwargs |
| 781 | + return self._set(**kwargs) |
| 782 | + |
| 783 | + def setStatement(self, value): |
| 784 | + """ |
| 785 | + Sets the value of :py:attr:`statement`. |
| 786 | + """ |
| 787 | + self._paramMap[self.statement] = value |
| 788 | + return self |
| 789 | + |
| 790 | + def getStatement(self): |
| 791 | + """ |
| 792 | + Gets the value of statement or its default value. |
| 793 | + """ |
| 794 | + return self.getOrDefault(self.statement) |
| 795 | + |
| 796 | + |
746 | 797 | @inherit_doc |
747 | 798 | class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): |
748 | 799 | """ |
|
0 commit comments