From b4fd733a708feb63254728f3af1fd20ee892d085 Mon Sep 17 00:00:00 2001 From: VinceShieh Date: Mon, 8 May 2017 09:54:30 +0800 Subject: [PATCH] [SPARK-17134][ML] Use level 2 BLAS operations in LogisticAggregator Multinomial logistic regression uses LogisticAggregator class for gradient updates. This PR refactors MLOR to use level 2 BLAS operations for the updates. Signed-off-by: VinceShieh --- .../classification/LogisticRegression.scala | 41 ++++++++----------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 42dc7fbebe4c3..7427b8090f944 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -23,6 +23,7 @@ import scala.collection.mutable import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.hadoop.fs.Path import org.apache.spark.SparkException @@ -1722,25 +1723,22 @@ private class LogisticAggregator( var maxMargin = Double.NegativeInfinity val margins = new Array[Double](numClasses) + val featureStdArray = new Array[Double](features.size) features.foreachActive { (index, value) => - val stdValue = value / localFeaturesStd(index) - var j = 0 - while (j < numClasses) { - margins(j) += localCoefficients(index * numClasses + j) * stdValue - j += 1 - } + featureStdArray(index) = value / localFeaturesStd(index) } - var i = 0 - while (i < numClasses) { - if (fitIntercept) { + + blas.dgemv("N", numCoefficientSets, numFeatures, 1.0, coefficientsArray, + numCoefficientSets, featureStdArray, 1, 1.0, margins, 1) + if (fitIntercept) { + var i = 0 + while (i < numClasses) { margins(i) += localCoefficients(numClasses * numFeatures + i) + i += 1 } - if (i == label.toInt) marginOfLabel = margins(i) - if (margins(i) > maxMargin) { - maxMargin = margins(i) - } - i += 1 } + marginOfLabel = margins(label.toInt) + maxMargin = margins.max /** * When maxMargin is greater than 0, the original formula could cause overflow. @@ -1764,17 +1762,10 @@ private class LogisticAggregator( margins.indices.foreach { i => multipliers(i) = multipliers(i) / sum - (if (label == i) 1.0 else 0.0) } - features.foreachActive { (index, value) => - if (localFeaturesStd(index) != 0.0 && value != 0.0) { - val stdValue = value / localFeaturesStd(index) - var j = 0 - while (j < numClasses) { - localGradientArray(index * numClasses + j) += - weight * multipliers(j) * stdValue - j += 1 - } - } - } + + blas.dger(numCoefficientSets, numFeatures, weight, multipliers, + 1, featureStdArray, 1, localGradientArray, numCoefficientSets) + if (fitIntercept) { var i = 0 while (i < numClasses) {