use local variable

zhengruifeng · zhengruifeng · commit 317bde95b345 · 2020-11-18T15:21:47.000+08:00
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
@@ -197,16 +197,12 @@ private[ml] class BlockAFTAggregator(
    * @return This BlockAFTAggregator object.
    */
   def add(block: InstanceBlock): this.type = {
-    // here use Instance.weight to store censor for convenience
-    val (matrix, labels, censors) = (block.matrix, block.labels, block.weightIter.toArray)
-    require(matrix.isTransposed)
-    require(numFeatures == matrix.numCols, s"Dimensions mismatch when adding new " +
-      s"instance. Expecting $numFeatures but got ${matrix.numCols}.")
-    require(labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
-
-    val size = matrix.numRows
-    require(labels.length == size && censors.length == size)
+    require(block.matrix.isTransposed)
+    require(numFeatures == block.numFeatures, s"Dimensions mismatch when adding new " +
+      s"instance. Expecting $numFeatures but got ${block.numFeatures}.")
+    require(block.labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
 
+    val size = block.size
     val intercept = coefficientsArray(dim - 2)
     // sigma is the scale parameter of the AFT model
     val sigma = math.exp(coefficientsArray(dim - 1))
@@ -217,26 +213,30 @@ private[ml] class BlockAFTAggregator(
     } else {
       Vectors.zeros(size).toDense
     }
-    BLAS.gemv(1.0, matrix, linear, 1.0, vec)
+    BLAS.gemv(1.0, block.matrix, linear, 1.0, vec)
 
     // in-place convert margins to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     var sigmaGradSum = 0.0
     while (i < size) {
-      val ti = labels(i)
-      val delta = censors(i)
+      val ti = block.getLabel(i)
+      // here use Instance.weight to store censor for convenience
+      val delta = block.getWeight(i)
       val margin = vec(i)
       val epsilon = (math.log(ti) - margin) / sigma
       val expEpsilon = math.exp(epsilon)
-      lossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
+      localLossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
       val multiplier = (delta - expEpsilon) / sigma
       vec.values(i) = multiplier
       sigmaGradSum += delta + multiplier * sigma * epsilon
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += size
 
-    matrix match {
+    block.matrix match {
       case dm: DenseMatrix =>
         BLAS.nativeBLAS.dgemv("N", dm.numCols, dm.numRows, 1.0, dm.values, dm.numCols,
           vec.values, 1, 1.0, gradientSumArray, 1)
@@ -250,7 +250,6 @@ private[ml] class BlockAFTAggregator(
 
     if (fitIntercept) gradientSumArray(dim - 2) += vec.values.sum
     gradientSumArray(dim - 1) += sigmaGradSum
-    weightSum += size
 
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
@@ -162,24 +162,26 @@ private[ml] class BlockHingeAggregator(
 
     // in-place convert dotProducts to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         // Our loss function with {0, 1} labels is max(0, 1 - (2y - 1) (f_w(x)))
         // Therefore the gradient is -(2y - 1)*x
         val label = block.getLabel(i)
         val labelScaled = label + label - 1.0
         val loss = (1.0 - labelScaled * vec(i)) * weight
         if (loss > 0) {
-          lossSum += loss
+          localLossSum += loss
           val gradScale = -labelScaled * weight
           vec.values(i) = gradScale
         } else { vec.values(i) = 0.0 }
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return this
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
@@ -167,7 +167,6 @@ private[ml] class BlockHuberAggregator(
 
   protected override val dim: Int = bcParameters.value.size
   private val numFeatures = if (fitIntercept) dim - 2 else dim - 1
-  private val sigma = bcParameters.value(dim - 1)
   private val intercept = if (fitIntercept) bcParameters.value(dim - 2) else 0.0
   // make transient so we do not serialize between aggregation stages
   @transient private lazy val linear = Vectors.dense(bcParameters.value.toArray.take(numFeatures))
@@ -187,7 +186,9 @@ private[ml] class BlockHuberAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val sigma = bcParameters.value(dim - 1)
 
     // vec here represents margins or dotProducts
     val vec = if (fitIntercept) {
@@ -200,23 +201,23 @@ private[ml] class BlockHuberAggregator(
     // in-place convert margins to multipliers
     // then, vec represents multipliers
     var sigmaGradSum = 0.0
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         val linearLoss = label - margin
 
         if (math.abs(linearLoss) <= sigma * epsilon) {
-          lossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
+          localLossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
           val linearLossDivSigma = linearLoss / sigma
           val multiplier = -1.0 * weight * linearLossDivSigma
           vec.values(i) = multiplier
           sigmaGradSum += 0.5 * weight * (1.0 - math.pow(linearLossDivSigma, 2.0))
         } else {
-          lossSum += 0.5 * weight *
+          localLossSum += 0.5 * weight *
             (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
           val sign = if (linearLoss >= 0) -1.0 else 1.0
           val multiplier = weight * sign * epsilon
@@ -226,6 +227,8 @@ private[ml] class BlockHuberAggregator(
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     block.matrix match {
       case dm: DenseMatrix =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
@@ -267,9 +267,6 @@ private[ml] class BlockLeastSquaresAggregator(
     val offset = if (fitIntercept) labelMean / labelStd - sum else 0.0
     (Vectors.dense(coefficientsArray), offset)
   }
-  // do not use tuple assignment above because it will circumvent the @transient tag
-  @transient private lazy val effectiveCoefficientsVec = effectiveCoefAndOffset._1
-  @transient private lazy val offset = effectiveCoefAndOffset._2
 
   /**
    * Add a new training instance block to this BlockLeastSquaresAggregator, and update the loss
@@ -286,24 +283,28 @@ private[ml] class BlockLeastSquaresAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val (effectiveCoefficientsVec, offset) = effectiveCoefAndOffset
 
     // vec here represents diffs
     val vec = new DenseVector(Array.tabulate(size)(i => offset - block.getLabel(i) / labelStd))
     BLAS.gemv(1.0, block.matrix, effectiveCoefficientsVec, 1.0, vec)
 
     // in-place convert diffs to multipliers
     // then, vec represents multipliers
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       val diff = vec(i)
-      lossSum += weight * diff * diff / 2
-      weightSum += weight
+      localLossSum += weight * diff * diff / 2
       val multiplier = weight * diff
       vec.values(i) = multiplier
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     val gradSumVec = new DenseVector(gradientSumArray)
     BLAS.gemv(1.0, block.matrix.transpose, vec, 1.0, gradSumVec)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
@@ -466,24 +466,26 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multiplier
     // then, vec represents multiplier
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         if (label > 0) {
           // The following is equivalent to log(1 + exp(margin)) but more numerically stable.
-          lossSum += weight * Utils.log1pExp(margin)
+          localLossSum += weight * Utils.log1pExp(margin)
         } else {
-          lossSum += weight * (Utils.log1pExp(margin) - margin)
+          localLossSum += weight * (Utils.log1pExp(margin) - margin)
         }
         val multiplier = weight * (1.0 / (1.0 + math.exp(margin)) - label)
         vec.values(i) = multiplier
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return
@@ -514,10 +516,11 @@ private[ml] class BlockLogisticAggregator(
     // mat here represents margins, shape: S X C
     val mat = DenseMatrix.zeros(size, numClasses)
     if (fitIntercept) {
+      val localCoefficientsArray = coefficientsArray
       val offset = numClasses * numFeatures
       var j = 0
       while (j < numClasses) {
-        val intercept = coefficientsArray(offset + j)
+        val intercept = localCoefficientsArray(offset + j)
         var i = 0
         while (i < size) { mat.update(i, j, intercept); i += 1 }
         j += 1
@@ -527,13 +530,13 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multipliers
     // then, mat represents multipliers
+    var localLossSum = 0.0
     var i = 0
     val tmp = Array.ofDim[Double](numClasses)
     val interceptGradSumArr = if (fitIntercept) Array.ofDim[Double](numClasses) else null
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
 
         var maxMargin = Double.NegativeInfinity
@@ -566,15 +569,17 @@ private[ml] class BlockLogisticAggregator(
         }
 
         if (maxMargin > 0) {
-          lossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
+          localLossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
         } else {
-          lossSum += weight * (math.log(sum) - marginOfLabel)
+          localLossSum += weight * (math.log(sum) - marginOfLabel)
         }
       } else {
         var j = 0; while (j < numClasses) { mat.update(i, j, 0.0); j += 1 }
       }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // mat (multipliers):             S X C, dense                                N
     // mat.transpose (multipliers):   C X S, dense                                T