Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
public class JavaDecisionTreeClassificationExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaDecisionTreeClassificationExample").getOrCreate();
.builder()
.appName("JavaDecisionTreeClassificationExample")
.getOrCreate();

// $example on$
// Load the data stored in LIBSVM format as a DataFrame.
Expand All @@ -52,10 +54,10 @@ public static void main(String[] args) {
VectorIndexerModel featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
.fit(data);

// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
Expand All @@ -71,11 +73,11 @@ public static void main(String[] args) {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels());

// Chain indexers and tree in a Pipeline
// Chain indexers and tree in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});

// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
PipelineModel model = pipeline.fit(trainingData);

// Make predictions.
Expand All @@ -84,7 +86,7 @@ public static void main(String[] args) {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5);

// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
public class JavaDecisionTreeRegressionExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaDecisionTreeRegressionExample").getOrCreate();
.builder()
.appName("JavaDecisionTreeRegressionExample")
.getOrCreate();
// $example on$
// Load the data stored in LIBSVM format as a DataFrame.
Dataset<Row> data = spark.read().format("libsvm")
Expand All @@ -47,7 +49,7 @@ public static void main(String[] args) {
.setMaxCategories(4)
.fit(data);

// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
Expand All @@ -56,11 +58,11 @@ public static void main(String[] args) {
DecisionTreeRegressor dt = new DecisionTreeRegressor()
.setFeaturesCol("indexedFeatures");

// Chain indexer and tree in a Pipeline
// Chain indexer and tree in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{featureIndexer, dt});

// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);

// Make predictions.
Expand All @@ -69,7 +71,7 @@ public static void main(String[] args) {
// Select example rows to display.
predictions.select("label", "features").show(5);

// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
RegressionEvaluator evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ public static void main(String[] args) throws Exception {
new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
Dataset<Row> training = spark.createDataFrame(localTraining, LabeledPoint.class);

// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
MyJavaLogisticRegression lr = new MyJavaLogisticRegression();
// Print out the parameters, documentation, and any default values.
System.out.println("MyJavaLogisticRegression parameters:\n" + lr.explainParams() + "\n");

// We may set parameters using setter methods.
lr.setMaxIter(10);

// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
MyJavaLogisticRegressionModel model = lr.fit(training);

// Prepare test data.
Expand Down Expand Up @@ -214,7 +214,7 @@ public Vector predictRaw(Vector features) {
}

/**
* Number of classes the label can take. 2 indicates binary classification.
* Number of classes the label can take. 2 indicates binary classification.
*/
public int numClasses() { return 2; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
public class JavaEstimatorTransformerParamExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaEstimatorTransformerParamExample").getOrCreate();
.builder()
.appName("JavaEstimatorTransformerParamExample")
.getOrCreate();

// $example on$
// Prepare training data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ public static void main(String[] args) {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels());

// Chain indexers and GBT in a Pipeline
// Chain indexers and GBT in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter});

// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
PipelineModel model = pipeline.fit(trainingData);

// Make predictions.
Expand All @@ -88,7 +88,7 @@ public static void main(String[] args) {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5);

// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
public class JavaGradientBoostedTreeRegressorExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaGradientBoostedTreeRegressorExample").getOrCreate();
.builder()
.appName("JavaGradientBoostedTreeRegressorExample")
.getOrCreate();

// $example on$
// Load and parse the data file, converting it to a DataFrame.
Expand All @@ -48,7 +50,7 @@ public static void main(String[] args) {
.setMaxCategories(4)
.fit(data);

// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
Expand All @@ -59,10 +61,10 @@ public static void main(String[] args) {
.setFeaturesCol("indexedFeatures")
.setMaxIter(10);

// Chain indexer and GBT in a Pipeline
// Chain indexer and GBT in a Pipeline.
Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureIndexer, gbt});

// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);

// Make predictions.
Expand All @@ -71,7 +73,7 @@ public static void main(String[] args) {
// Select example rows to display.
predictions.select("prediction", "label", "features").show(5);

// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
RegressionEvaluator evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
public class JavaLinearRegressionWithElasticNetExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLinearRegressionWithElasticNetExample").getOrCreate();
.builder()
.appName("JavaLinearRegressionWithElasticNetExample")
.getOrCreate();

// $example on$
// Load training data
// Load training data.
Dataset<Row> training = spark.read().format("libsvm")
.load("data/mllib/sample_linear_regression_data.txt");

Expand All @@ -42,14 +44,14 @@ public static void main(String[] args) {
.setRegParam(0.3)
.setElasticNetParam(0.8);

// Fit the model
// Fit the model.
LinearRegressionModel lrModel = lr.fit(training);

// Print the coefficients and intercept for linear regression
// Print the coefficients and intercept for linear regression.
System.out.println("Coefficients: "
+ lrModel.coefficients() + " Intercept: " + lrModel.intercept());

// Summarize the model over the training set and print out some metrics
// Summarize the model over the training set and print out some metrics.
LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
System.out.println("numIterations: " + trainingSummary.totalIterations());
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
public class JavaLogisticRegressionSummaryExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLogisticRegressionSummaryExample").getOrCreate();
.builder()
.appName("JavaLogisticRegressionSummaryExample")
.getOrCreate();

// Load training data
Dataset<Row> training = spark.read().format("libsvm")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
public class JavaLogisticRegressionWithElasticNetExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLogisticRegressionWithElasticNetExample").getOrCreate();
.builder()
.appName("JavaLogisticRegressionWithElasticNetExample")
.getOrCreate();

// $example on$
// Load training data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
public class JavaModelSelectionViaCrossValidationExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaModelSelectionViaCrossValidationExample").getOrCreate();
.builder()
.appName("JavaModelSelectionViaCrossValidationExample")
.getOrCreate();

// $example on$
// Prepare training documents, which are labeled.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
public class JavaModelSelectionViaTrainValidationSplitExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaModelSelectionViaTrainValidationSplitExample").getOrCreate();
.builder()
.appName("JavaModelSelectionViaTrainValidationSplitExample")
.getOrCreate();

// $example on$
Dataset<Row> data = spark.read().format("libsvm")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public class JavaMultilayerPerceptronClassifierExample {

public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaMultilayerPerceptronClassifierExample").getOrCreate();
.builder()
.appName("JavaMultilayerPerceptronClassifierExample")
.getOrCreate();

// $example on$
// Load training data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
public class JavaQuantileDiscretizerExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaQuantileDiscretizerExample").getOrCreate();
.builder()
.appName("JavaQuantileDiscretizerExample")
.getOrCreate();

// $example on$
List<Row> data = Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
public class JavaRandomForestClassifierExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaRandomForestClassifierExample").getOrCreate();
.builder()
.appName("JavaRandomForestClassifierExample")
.getOrCreate();

// $example on$
// Load and parse the data file, converting it to a DataFrame.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
public class JavaRandomForestRegressorExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaRandomForestRegressorExample").getOrCreate();
.builder()
.appName("JavaRandomForestRegressorExample")
.getOrCreate();

// $example on$
// Load and parse the data file, converting it to a DataFrame.
Expand Down Expand Up @@ -62,7 +64,7 @@ public static void main(String[] args) {
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[] {featureIndexer, rf});

// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);

// Make predictions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public static void main(String[] args) {
.getOrCreate();

// Prepare training data.
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
// into DataFrames, where it uses the bean metadata to infer the schema.
List<LabeledPoint> localTraining = Lists.newArrayList(
new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
Expand All @@ -56,7 +56,7 @@ public static void main(String[] args) {
Dataset<Row> training =
spark.createDataFrame(localTraining, LabeledPoint.class);

// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
LogisticRegression lr = new LogisticRegression();
// Print out the parameters, documentation, and any default values.
System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
Expand All @@ -65,7 +65,7 @@ public static void main(String[] args) {
lr.setMaxIter(10)
.setRegParam(0.01);

// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
LogisticRegressionModel model1 = lr.fit(training);
// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
// we can view the parameters it used during fit().
Expand All @@ -82,7 +82,7 @@ public static void main(String[] args) {

// One can also combine ParamMaps.
ParamMap paramMap2 = new ParamMap();
paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name
paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name.
ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);

// Now learn a new model using the paramMapCombined parameters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ public class JavaSimpleTextClassificationPipeline {

public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaSimpleTextClassificationPipeline").getOrCreate();
.builder()
.appName("JavaSimpleTextClassificationPipeline")
.getOrCreate();

// Prepare training documents, which are labeled.
List<LabeledDocument> localTraining = Lists.newArrayList(
Expand Down
Loading