Skip to content

Commit b24d3db

Browse files
lu-wang-dlmengxr
authored andcommitted
[SPARK-24290][ML] add support for Array input for instrumentation.logNamedValue
## What changes were proposed in this pull request? Extend instrumentation.logNamedValue to support Array input change the logging for "clusterSizes" to new method ## How was this patch tested? N/A Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG <[email protected]> Closes #21347 from ludatabricks/SPARK-24290.
1 parent 7297ae0 commit b24d3db

File tree

4 files changed

+16
-6
lines changed

4 files changed

+16
-6
lines changed

mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,7 @@ class BisectingKMeans @Since("2.0.0") (
276276
val summary = new BisectingKMeansSummary(
277277
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
278278
model.setSummary(Some(summary))
279-
// TODO: need to extend logNamedValue to support Array
280-
instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
279+
instr.logNamedValue("clusterSizes", summary.clusterSizes)
281280
instr.logSuccess(model)
282281
model
283282
}

mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,7 @@ class GaussianMixture @Since("2.0.0") (
426426
$(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood)
427427
model.setSummary(Some(summary))
428428
instr.logNamedValue("logLikelihood", logLikelihood)
429-
// TODO: need to extend logNamedValue to support Array
430-
instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
429+
instr.logNamedValue("clusterSizes", summary.clusterSizes)
431430
instr.logSuccess(model)
432431
model
433432
}

mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,7 @@ class KMeans @Since("1.5.0") (
359359
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
360360

361361
model.setSummary(Some(summary))
362-
// TODO: need to extend logNamedValue to support Array
363-
instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
362+
instr.logNamedValue("clusterSizes", summary.clusterSizes)
364363
instr.logSuccess(model)
365364
if (handlePersistence) {
366365
instances.unpersist()

mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,19 @@ private[spark] class Instrumentation[E <: Estimator[_]] private (
132132
log(compact(render(name -> value)))
133133
}
134134

135+
def logNamedValue(name: String, value: Array[String]): Unit = {
136+
log(compact(render(name -> compact(render(value.toSeq)))))
137+
}
138+
139+
def logNamedValue(name: String, value: Array[Long]): Unit = {
140+
log(compact(render(name -> compact(render(value.toSeq)))))
141+
}
142+
143+
def logNamedValue(name: String, value: Array[Double]): Unit = {
144+
log(compact(render(name -> compact(render(value.toSeq)))))
145+
}
146+
147+
135148
/**
136149
* Logs the successful completion of the training session.
137150
*/

0 commit comments

Comments
 (0)