@@ -34,7 +34,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
3434 private val seed : Int = 42
3535 @ transient var dataset : DataFrame = _
3636 @ transient var datasetWithoutIntercept : DataFrame = _
37- @ transient var datasetWithBigFeature : DataFrame = _
37+ @ transient var datasetWithManyFeature : DataFrame = _
3838
3939 /*
4040 In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
@@ -52,22 +52,27 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
5252 super .beforeAll()
5353 dataset = sqlContext.createDataFrame(
5454 sc.parallelize(LinearDataGenerator .generateLinearInput(
55- 6.3 , Array (4.7 , 7.2 ), Array (0.9 , - 1.3 ), Array (0.7 , 1.2 ), 10000 , seed, 0.1 ), 2 ))
55+ intercept = 6.3 , weights = Array (4.7 , 7.2 ), xMean = Array (0.9 , - 1.3 ),
56+ xVariance = Array (0.7 , 1.2 ), nPoints = 10000 , seed = seed, eps = 0.1 ), 2 ))
5657 /*
5758 datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
5859 training model without intercept
5960 */
6061 datasetWithoutIntercept = sqlContext.createDataFrame(
6162 sc.parallelize(LinearDataGenerator .generateLinearInput(
62- 0.0 , Array (4.7 , 7.2 ), Array (0.9 , - 1.3 ), Array (0.7 , 1.2 ), 10000 , seed, 0.1 ), 2 ))
63+ intercept = 0.0 , weights = Array (4.7 , 7.2 ), xMean = Array (0.9 , - 1.3 ),
64+ xVariance = Array (0.7 , 1.2 ), nPoints = 10000 , seed = seed, eps = 0.1 ), 2 ))
6365
6466 val r = new Random (seed)
67+ // When feature size is larger than 4096, normal optimizer is choosed
68+ // as the solver of linear regression in the case of "auto" mode.
6569 val featureSize = 4100
66- datasetWithBigFeature = sqlContext.createDataFrame(
67- sc.parallelize(LinearDataGenerator .generateLinearInput(
68- 0.0 , Seq .fill(featureSize)(r.nextDouble).toArray,
69- Seq .fill(featureSize)(r.nextDouble).toArray,
70- Seq .fill(featureSize)(r.nextDouble).toArray, 200 , seed, 0.1
70+ datasetWithManyFeature = sqlContext.createDataFrame(
71+ sc.parallelize(LinearDataGenerator .generateLinearSparseInput(
72+ intercept = 0.0 , weights = Seq .fill(featureSize)(r.nextDouble).toArray,
73+ xMean = Seq .fill(featureSize)(r.nextDouble).toArray,
74+ xVariance = Seq .fill(featureSize)(r.nextDouble).toArray, nPoints = 200 ,
75+ seed = seed, eps = 0.1
7176 ), 2 ))
7277 }
7378
@@ -696,7 +701,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
696701
697702 test(" linear regression model with l-bfgs with big feature datasets" ) {
698703 val trainer = new LinearRegression ().setSolver(" auto" )
699- val model = trainer.fit(datasetWithBigFeature )
704+ val model = trainer.fit(datasetWithManyFeature )
700705
701706 // Training results for the model should be available
702707 assert(model.hasSummary)
0 commit comments