Skip to content

Commit 798825c

Browse files
HyukjinKwonjkbradley
authored andcommitted
[SPARK-14615][ML][FOLLOWUP] Fix Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms
## What changes were proposed in this pull request? This PR fixes Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms. I firstly executed this shell command, `grep -r "from pyspark.mllib" .` and then executed them all. Some of tests in `ml` produced the error messages as below: ``` pyspark.sql.utils.IllegalArgumentException: u'requirement failed: Input type must be VectorUDT but got org.apache.spark.mllib.linalg.VectorUDTf71b0bce.' ``` So, I fixed them to use new ones just identically with some Python tests fixed in #12627 ## How was this patch tested? Manually tested for all the examples listed by `grep -r "from pyspark.mllib" .`. Author: hyukjinkwon <[email protected]> Closes #13393 from HyukjinKwon/SPARK-14615. (cherry picked from commit 99f3c82) Signed-off-by: Joseph K. Bradley <[email protected]>
1 parent 7d6bd11 commit 798825c

10 files changed

+18
-19
lines changed

examples/src/main/python/ml/aft_survival_regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.regression import AFTSurvivalRegression
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

examples/src/main/python/ml/chisq_selector_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from pyspark.sql import SparkSession
2121
# $example on$
2222
from pyspark.ml.feature import ChiSqSelector
23-
from pyspark.mllib.linalg import Vectors
23+
from pyspark.ml.linalg import Vectors
2424
# $example off$
2525

2626
if __name__ == "__main__":

examples/src/main/python/ml/dct_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.feature import DCT
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

examples/src/main/python/ml/elementwise_product_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.feature import ElementwiseProduct
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

examples/src/main/python/ml/estimator_transformer_param_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"""
2121

2222
# $example on$
23-
from pyspark.mllib.linalg import Vectors
23+
from pyspark.ml.linalg import Vectors
2424
from pyspark.ml.classification import LogisticRegression
2525
# $example off$
2626
from pyspark.sql import SparkSession

examples/src/main/python/ml/pca_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.feature import PCA
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

examples/src/main/python/ml/polynomial_expansion_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.feature import PolynomialExpansion
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

examples/src/main/python/ml/simple_params_example.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@
2121
import sys
2222

2323
from pyspark.ml.classification import LogisticRegression
24-
from pyspark.mllib.linalg import DenseVector
25-
from pyspark.mllib.regression import LabeledPoint
26-
from pyspark.sql import SparkSession
24+
from pyspark.ml.linalg import DenseVector
25+
from pyspark.sql import Row, SparkSession
2726

2827
"""
2928
A simple example demonstrating ways to specify parameters for Estimators and Transformers.
@@ -42,10 +41,10 @@
4241
# A LabeledPoint is an Object with two fields named label and features
4342
# and Spark SQL identifies these fields and creates the schema appropriately.
4443
training = spark.createDataFrame([
45-
LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])),
46-
LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])),
47-
LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])),
48-
LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))])
44+
Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])),
45+
Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])),
46+
Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])),
47+
Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))])
4948

5049
# Create a LogisticRegression instance with maxIter = 10.
5150
# This instance is an Estimator.
@@ -77,9 +76,9 @@
7776

7877
# prepare test data.
7978
test = spark.createDataFrame([
80-
LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])),
81-
LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])),
82-
LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))])
79+
Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])),
80+
Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])),
81+
Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))])
8382

8483
# Make predictions on test data using the Transformer.transform() method.
8584
# LogisticRegressionModel.transform will only use the 'features' column.

examples/src/main/python/ml/vector_assembler_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from __future__ import print_function
1919

2020
# $example on$
21-
from pyspark.mllib.linalg import Vectors
21+
from pyspark.ml.linalg import Vectors
2222
from pyspark.ml.feature import VectorAssembler
2323
# $example off$
2424
from pyspark.sql import SparkSession

examples/src/main/python/ml/vector_slicer_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# $example on$
2121
from pyspark.ml.feature import VectorSlicer
22-
from pyspark.mllib.linalg import Vectors
22+
from pyspark.ml.linalg import Vectors
2323
from pyspark.sql.types import Row
2424
# $example off$
2525
from pyspark.sql import SparkSession

0 commit comments

Comments
 (0)