|
21 | 21 | import sys |
22 | 22 |
|
23 | 23 | from pyspark.ml.classification import LogisticRegression |
24 | | -from pyspark.mllib.linalg import DenseVector |
25 | | -from pyspark.mllib.regression import LabeledPoint |
26 | | -from pyspark.sql import SparkSession |
| 24 | +from pyspark.ml.linalg import DenseVector |
| 25 | +from pyspark.sql import Row, SparkSession |
27 | 26 |
|
28 | 27 | """ |
29 | 28 | A simple example demonstrating ways to specify parameters for Estimators and Transformers. |
|
42 | 41 | # A LabeledPoint is an Object with two fields named label and features |
43 | 42 | # and Spark SQL identifies these fields and creates the schema appropriately. |
44 | 43 | training = spark.createDataFrame([ |
45 | | - LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])), |
46 | | - LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])), |
47 | | - LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])), |
48 | | - LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))]) |
| 44 | + Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])), |
| 45 | + Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])), |
| 46 | + Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])), |
| 47 | + Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))]) |
49 | 48 |
|
50 | 49 | # Create a LogisticRegression instance with maxIter = 10. |
51 | 50 | # This instance is an Estimator. |
|
77 | 76 |
|
78 | 77 | # prepare test data. |
79 | 78 | test = spark.createDataFrame([ |
80 | | - LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])), |
81 | | - LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])), |
82 | | - LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))]) |
| 79 | + Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])), |
| 80 | + Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])), |
| 81 | + Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))]) |
83 | 82 |
|
84 | 83 | # Make predictions on test data using the Transformer.transform() method. |
85 | 84 | # LogisticRegressionModel.transform will only use the 'features' column. |
|
0 commit comments