From 50eee83d39d146df83d4aa9d76a1cad49669f9b1 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Fri, 6 Nov 2015 17:32:37 +0800 Subject: [PATCH 1/2] add compact in Matrices fromBreeze --- .../org/apache/spark/mllib/linalg/Matrices.scala | 1 + .../apache/spark/mllib/linalg/MatricesSuite.scala | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 8879dcf75c9b..7403d60b6c69 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -879,6 +879,7 @@ object Matrices { case dm: BDM[Double] => new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => + sm.compact() // There is no isTranspose flag for sparse matrices in Breeze new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) case _ => diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 1833cf383367..c990ed811091 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.mllib.linalg import java.util.Random +import breeze.linalg.{CSCMatrix, Matrix => BM} import org.mockito.Mockito.when import org.scalatest.mock.MockitoSugar._ import scala.collection.mutable.{Map => MutableMap} @@ -494,4 +495,15 @@ class MatricesSuite extends SparkFunSuite { assert(sm1.numNonzeros === 1) assert(sm1.numActives === 3) } + + test("fromBreeze with sparse matrix") { + // colPtr.last does NOT always equal to values.length in breeze SCSMatrix and + // invocation of compact() is necessary. Refer to SPARK-11507 + val bm1: BM[Double] = new CSCMatrix[Double] ( + Array (1.0, 1, 1), 3, 3, Array (0, 1, 2, 3), Array (0, 1, 2) ) + val bm2: BM[Double] = new CSCMatrix[Double] ( + Array (1.0, 2, 2, 4), 3, 3, Array (0, 0, 2, 4), Array (1, 2, 1, 2) ) + val sum = bm1 + bm2 + Matrices.fromBreeze(sum) + } } From 75181e4712b564354e9f8e72d07cdae592180772 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Tue, 22 Mar 2016 10:47:31 -0400 Subject: [PATCH 2/2] update comments --- .../scala/org/apache/spark/mllib/linalg/Matrices.scala | 7 +++++-- .../org/apache/spark/mllib/linalg/MatricesSuite.scala | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 3b5e80ef603a..15ea82135378 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -940,11 +940,14 @@ object Matrices { case dm: BDM[Double] => new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => - val mat = if(sm.colPtrs.last != sm.data.length) { + // Spark-11507. work around breeze issue 479. + val mat = if (sm.colPtrs.last != sm.data.length) { val matCopy = sm.copy matCopy.compact() matCopy - } else sm + } else { + sm + } // There is no isTranspose flag for sparse matrices in Breeze new SparseMatrix(mat.rows, mat.cols, mat.colPtrs, mat.rowIndices, mat.data) case _ => diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 84aa90cacf57..e9975d6b4832 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -498,7 +498,7 @@ class MatricesSuite extends SparkFunSuite { test("fromBreeze with sparse matrix") { // colPtr.last does NOT always equal to values.length in breeze SCSMatrix and - // invocation of compact() is necessary. Refer to SPARK-11507 + // invocation of compact() may be necessary. Refer to SPARK-11507 val bm1: BM[Double] = new CSCMatrix[Double]( Array(1.0, 1, 1), 3, 3, Array(0, 1, 2, 3), Array(0, 1, 2)) val bm2: BM[Double] = new CSCMatrix[Double](