Skip to content

Commit ade614b

Browse files
committed
Make an python example working with SparkSession
1 parent f116a84 commit ade614b

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

examples/src/main/python/ml/simple_params_example.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@
2020
import pprint
2121
import sys
2222

23-
from pyspark import SparkContext
2423
from pyspark.ml.classification import LogisticRegression
2524
from pyspark.mllib.linalg import DenseVector
2625
from pyspark.mllib.regression import LabeledPoint
27-
from pyspark.sql import SQLContext
26+
from pyspark.sql import Row, SparkSession
2827

2928
"""
3029
A simple example demonstrating ways to specify parameters for Estimators and Transformers.
@@ -36,18 +35,20 @@
3635
if len(sys.argv) > 1:
3736
print("Usage: simple_params_example", file=sys.stderr)
3837
exit(1)
39-
sc = SparkContext(appName="PythonSimpleParamsExample")
40-
sqlContext = SQLContext(sc)
38+
spark = SparkSession \
39+
.builder \
40+
.appName("SimpleTextClassificationPipeline") \
41+
.getOrCreate()
4142

4243
# prepare training data.
4344
# We create an RDD of LabeledPoints and convert them into a DataFrame.
4445
# A LabeledPoint is an Object with two fields named label and features
4546
# and Spark SQL identifies these fields and creates the schema appropriately.
46-
training = sc.parallelize([
47+
training = spark.createDataFrame([
4748
LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])),
4849
LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])),
4950
LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])),
50-
LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))]).toDF()
51+
LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))])
5152

5253
# Create a LogisticRegression instance with maxIter = 10.
5354
# This instance is an Estimator.
@@ -70,18 +71,18 @@
7071

7172
# We may alternatively specify parameters using a parameter map.
7273
# paramMap overrides all lr parameters set earlier.
73-
paramMap = {lr.maxIter: 20, lr.thresholds: [0.45, 0.55], lr.probabilityCol: "myProbability"}
74+
paramMap = {lr.maxIter: 20, lr.thresholds: [0.5, 0.5], lr.probabilityCol: "myProbability"}
7475

7576
# Now learn a new model using the new parameters.
7677
model2 = lr.fit(training, paramMap)
7778
print("Model 2 was fit using parameters:\n")
7879
pprint.pprint(model2.extractParamMap())
7980

8081
# prepare test data.
81-
test = sc.parallelize([
82+
test = spark.createDataFrame([
8283
LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])),
8384
LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])),
84-
LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))]).toDF()
85+
LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))])
8586

8687
# Make predictions on test data using the Transformer.transform() method.
8788
# LogisticRegressionModel.transform will only use the 'features' column.
@@ -95,4 +96,4 @@
9596
print("features=%s,label=%s -> prob=%s, prediction=%s"
9697
% (row.features, row.label, row.myProbability, row.prediction))
9798

98-
sc.stop()
99+
spark.stop()

0 commit comments

Comments
 (0)