Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dev/deps/spark-deps-hadoop-2.6
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.11-0.13.1.jar
breeze_2.11-0.13.1.jar
breeze-macros_2.11-0.13.2.jar
breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
Expand Down
4 changes: 2 additions & 2 deletions dev/deps/spark-deps-hadoop-2.7
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.11-0.13.1.jar
breeze_2.11-0.13.1.jar
breeze-macros_2.11-0.13.2.jar
breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ private class AFTAggregator(
val ti = data.label
val delta = data.censor

require(ti > 0.0, "The lifetime or label should be greater than 0.")

val localFeaturesStd = bcFeaturesStd.value

val margin = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,6 @@ class AFTSurvivalRegressionSuite

test("should support all NumericType censors, and not support other types") {
val df = spark.createDataFrame(Seq(
(0, Vectors.dense(0)),
(1, Vectors.dense(1)),
(2, Vectors.dense(2)),
(3, Vectors.dense(3)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ object MLTestingUtils extends SparkFunSuite {
featuresColName: String = "features",
censorColName: String = "censor"): Map[NumericType, DataFrame] = {
val df = spark.createDataFrame(Seq(
(0, Vectors.dense(0)),
(1, Vectors.dense(1)),
(2, Vectors.dense(2)),
(3, Vectors.dense(3)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers
// With smaller convergenceTol, it takes more steps.
assert(lossLBFGS3.length > lossLBFGS2.length)

// Based on observation, lossLBFGS3 runs 7 iterations, no theoretically guaranteed.
assert(lossLBFGS3.length == 7)
// Based on observation, lossLBFGS3 runs 6 iterations, no theoretically guaranteed.
assert(lossLBFGS3.length == 6)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no theoretically guaranteed, why we need to keep this test? I remember we change here multiple times when we did breeze upgrade. What do you think about just removing this line? We never check the number of iterations at other test suites. @srowen @WeichenXu123

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK by me. You could also make it a range. Or something really basic like "> 0".

assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
}

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
<version>0.13.1</version>
<version>0.13.2</version>
<exclusions>
<!-- This is included as a compile-scoped dependency by jtransforms, which is
a dependency of breeze. -->
Expand Down
14 changes: 7 additions & 7 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,20 +1123,20 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
>>> from pyspark.ml.linalg import Vectors
>>> df = spark.createDataFrame([
... (1.0, Vectors.dense(1.0), 1.0),
... (0.0, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
... (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
>>> aftsr = AFTSurvivalRegression()
>>> model = aftsr.fit(df)
>>> model.predict(Vectors.dense(6.3))
1.0
>>> model.predictQuantiles(Vectors.dense(6.3))
DenseVector([0.0101, 0.0513, 0.1054, 0.2877, 0.6931, 1.3863, 2.3026, 2.9957, 4.6052])
>>> model.transform(df).show()
+-----+---------+------+----------+
|label| features|censor|prediction|
+-----+---------+------+----------+
| 1.0| [1.0]| 1.0| 1.0|
| 0.0|(1,[],[])| 0.0| 1.0|
+-----+---------+------+----------+
+-------+---------+------+----------+
| label| features|censor|prediction|
+-------+---------+------+----------+
| 1.0| [1.0]| 1.0| 1.0|
|1.0E-40|(1,[],[])| 0.0| 1.0|
+-------+---------+------+----------+
...
>>> aftsr_path = temp_path + "/aftsr"
>>> aftsr.save(aftsr_path)
Expand Down