From 23e2ed7895e4134427fcef8a285e7bbcb91eed52 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Tue, 1 Aug 2017 10:17:02 -0700 Subject: [PATCH 1/4] init pr --- dev/deps/spark-deps-hadoop-2.6 | 4 ++-- dev/deps/spark-deps-hadoop-2.7 | 4 ++-- pom.xml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 76e37646af0ec..9a0f414f2c1db 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -22,8 +22,8 @@ avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.13.1.jar -breeze_2.11-0.13.1.jar +breeze-macros_2.11-0.13.2.jar +breeze_2.11-0.13.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index f4173477d05d3..e5b1c2333da13 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -22,8 +22,8 @@ avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze-macros_2.11-0.13.1.jar -breeze_2.11-0.13.1.jar +breeze-macros_2.11-0.13.2.jar +breeze_2.11-0.13.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar diff --git a/pom.xml b/pom.xml index d54a9c4f19667..a56e45cdc8d99 100644 --- a/pom.xml +++ b/pom.xml @@ -678,7 +678,7 @@ org.scalanlp breeze_${scala.binary.version} - 0.13.1 + 0.13.2 From 425fd4101f4c40af790f7687af6420635fea9784 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Fri, 4 Aug 2017 10:35:04 -0700 Subject: [PATCH 2/4] fix testcase --- .../test/scala/org/apache/spark/ml/util/MLTestingUtils.scala | 1 - .../org/apache/spark/mllib/optimization/LBFGSSuite.scala | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala index bef79e634f75f..aef81c8c173a0 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala @@ -160,7 +160,6 @@ object MLTestingUtils extends SparkFunSuite { featuresColName: String = "features", censorColName: String = "censor"): Map[NumericType, DataFrame] = { val df = spark.createDataFrame(Seq( - (0, Vectors.dense(0)), (1, Vectors.dense(1)), (2, Vectors.dense(2)), (3, Vectors.dense(3)), diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala index 3d6a9f8d84cac..69c303ee932e0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala @@ -191,8 +191,8 @@ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers // With smaller convergenceTol, it takes more steps. assert(lossLBFGS3.length > lossLBFGS2.length) - // Based on observation, lossLBFGS3 runs 7 iterations, no theoretically guaranteed. - assert(lossLBFGS3.length == 7) + // Based on observation, lossLBFGS3 runs 6 iterations, no theoretically guaranteed. + assert(lossLBFGS3.length == 6) assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol) } From c9ae023864f277c0f68f99bd94c53eb80cd27db1 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Fri, 4 Aug 2017 10:45:49 -0700 Subject: [PATCH 3/4] fix aft testcase --- .../apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala index fb39e50a83552..02e5c6d294f44 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala @@ -364,7 +364,6 @@ class AFTSurvivalRegressionSuite test("should support all NumericType censors, and not support other types") { val df = spark.createDataFrame(Seq( - (0, Vectors.dense(0)), (1, Vectors.dense(1)), (2, Vectors.dense(2)), (3, Vectors.dense(3)), From 5063758c8b1903000e3718d8085b6ef1af3b37f3 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Mon, 7 Aug 2017 16:15:56 -0700 Subject: [PATCH 4/4] update aft py testcase --- .../ml/regression/AFTSurvivalRegression.scala | 2 ++ python/pyspark/ml/regression.py | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 094853b6f4802..0891994530f88 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -553,6 +553,8 @@ private class AFTAggregator( val ti = data.label val delta = data.censor + require(ti > 0.0, "The lifetime or label should be greater than 0.") + val localFeaturesStd = bcFeaturesStd.value val margin = { diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 72374acbe019f..9d5b768091cf4 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -1123,7 +1123,7 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi >>> from pyspark.ml.linalg import Vectors >>> df = spark.createDataFrame([ ... (1.0, Vectors.dense(1.0), 1.0), - ... (0.0, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"]) + ... (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"]) >>> aftsr = AFTSurvivalRegression() >>> model = aftsr.fit(df) >>> model.predict(Vectors.dense(6.3)) @@ -1131,12 +1131,12 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi >>> model.predictQuantiles(Vectors.dense(6.3)) DenseVector([0.0101, 0.0513, 0.1054, 0.2877, 0.6931, 1.3863, 2.3026, 2.9957, 4.6052]) >>> model.transform(df).show() - +-----+---------+------+----------+ - |label| features|censor|prediction| - +-----+---------+------+----------+ - | 1.0| [1.0]| 1.0| 1.0| - | 0.0|(1,[],[])| 0.0| 1.0| - +-----+---------+------+----------+ + +-------+---------+------+----------+ + | label| features|censor|prediction| + +-------+---------+------+----------+ + | 1.0| [1.0]| 1.0| 1.0| + |1.0E-40|(1,[],[])| 0.0| 1.0| + +-------+---------+------+----------+ ... >>> aftsr_path = temp_path + "/aftsr" >>> aftsr.save(aftsr_path)