From f3f7e650455600c0d266395ac549090b959ada95 Mon Sep 17 00:00:00 2001 From: rahul Date: Thu, 1 Oct 2015 18:19:04 -0700 Subject: [PATCH 1/6] updated SparseMatrix equal method --- .../main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index c02ba426fcc3..7a8fa03ea156 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -550,7 +550,10 @@ class SparseMatrix @Since("1.3.0") ( values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => + val thisIteratorSet = toBreeze.activeIterator.toSet + val mIteratorSet = m.toBreeze.activeIterator.toSet.filter(p => p._2 != 0.0) + thisIteratorSet == mIteratorSet case _ => false } From f9c6ac0795919afedc85d66f246abf933dc0d092 Mon Sep 17 00:00:00 2001 From: rahul Date: Fri, 9 Oct 2015 02:28:24 -0700 Subject: [PATCH 2/6] updated SparseMatrix equal method to use iterator traversal. --- .../apache/spark/mllib/linalg/Matrices.scala | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 7a8fa03ea156..6ee970afc50c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -550,10 +550,19 @@ class SparseMatrix @Since("1.3.0") ( values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => - val thisIteratorSet = toBreeze.activeIterator.toSet - val mIteratorSet = m.toBreeze.activeIterator.toSet.filter(p => p._2 != 0.0) - thisIteratorSet == mIteratorSet + case m : Matrix => + if (this.numRows != m.numRows || this.numCols != m.numCols) return false + if (this.numNonzeros != m.numNonzeros) return false + val activeIterator = toBreeze.activeIterator + m match { + case s: SparseMatrix => s.toBreeze.activeIterator.sameElements(activeIterator) + case d: DenseMatrix => + while(activeIterator.hasNext){ + val next = activeIterator.next() + if(d.apply(next._1._1, next._1._2) != next._2) return false + } + true + } case _ => false } From 70016e465c68df1c566e2f5eb44ee19571321312 Mon Sep 17 00:00:00 2001 From: rahul Date: Fri, 9 Oct 2015 17:33:19 -0700 Subject: [PATCH 3/6] Updated tests to include 3x3 matrix with more zero elements. --- .../scala/org/apache/spark/mllib/linalg/MatricesSuite.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index bfd6d5495f5e..bda649e1d3e1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -90,6 +90,12 @@ class MatricesSuite extends SparkFunSuite { val sm2 = dm2.asInstanceOf[DenseMatrix].toSparse assert(sm1 === sm2.transpose) assert(sm1 === dm2.transpose) + + val dmEye = Matrices.dense(3, 3, Array(1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0)) + val smEye = dmEye.asInstanceOf[DenseMatrix].toSparse + assert(smEye == smEye) + assert(smEye == dmEye) + assert(smEye != smEye.transpose) } test("matrix copies are deep copies") { From 2fff7d74f95242f208874d4f5059e5d86198b8a5 Mon Sep 17 00:00:00 2001 From: rahul Date: Fri, 9 Oct 2015 17:38:06 -0700 Subject: [PATCH 4/6] Added tests with identity matrices --- .../spark/mllib/linalg/MatricesSuite.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index bda649e1d3e1..2249d8e5a587 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -91,11 +91,17 @@ class MatricesSuite extends SparkFunSuite { assert(sm1 === sm2.transpose) assert(sm1 === dm2.transpose) - val dmEye = Matrices.dense(3, 3, Array(1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0)) - val smEye = dmEye.asInstanceOf[DenseMatrix].toSparse - assert(smEye == smEye) - assert(smEye == dmEye) - assert(smEye != smEye.transpose) + val dm3 = Matrices.dense(3, 3, Array(1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0)) + val sm3 = dm3.asInstanceOf[DenseMatrix].toSparse + assert(sm3 === sm3) + assert(sm3 === dm3) + assert(sm3 !== sm3.transpose) + + val dmEye = Matrices.eye(10) + val smEye = Matrices.speye(10) + assert(smEye === smEye) + assert(smEye === dmEye) + assert(smEye === smEye.transpose) } test("matrix copies are deep copies") { From 7f3f888ff8eb02cdfcbf0f9cad359173154df413 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Fri, 9 Oct 2015 19:05:34 -0700 Subject: [PATCH 5/6] unit test which fails in MatricesSuite equals --- .../scala/org/apache/spark/mllib/linalg/MatricesSuite.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 2249d8e5a587..529150fd17c3 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -97,6 +97,10 @@ class MatricesSuite extends SparkFunSuite { assert(sm3 === dm3) assert(sm3 !== sm3.transpose) + val sm3explicit = Matrices.sparse(3, 3, colPtrs = Array(0, 3, 6, 9), + rowIndices = Array(0, 1, 2, 0, 1, 2, 0, 1, 2), values = dm3.toArray) + assert(sm3 === sm3explicit) + val dmEye = Matrices.eye(10) val smEye = Matrices.speye(10) assert(smEye === smEye) From b3c29cab43a21dd9626118ca16d0bd7ba2d2ddd7 Mon Sep 17 00:00:00 2001 From: rahul Date: Fri, 16 Oct 2015 17:44:53 -0700 Subject: [PATCH 6/6] Updated to handle explicit and implicit zero comparison. Added corresponding tests. --- .../apache/spark/mllib/linalg/Matrices.scala | 5 +++- .../spark/mllib/linalg/MatricesSuite.scala | 23 ++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 6ee970afc50c..5439b3acc30d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -555,7 +555,10 @@ class SparseMatrix @Since("1.3.0") ( if (this.numNonzeros != m.numNonzeros) return false val activeIterator = toBreeze.activeIterator m match { - case s: SparseMatrix => s.toBreeze.activeIterator.sameElements(activeIterator) + case s: SparseMatrix => + val filterIter = s.toBreeze.activeIterator.withFilter(_._2 != 0.0) + val currFilterIter = activeIterator.withFilter(_._2 != 0.0) + filterIter.sameElements(currFilterIter) case d: DenseMatrix => while(activeIterator.hasNext){ val next = activeIterator.next() diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 2249d8e5a587..143c9936c8bb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -82,6 +82,9 @@ class MatricesSuite extends SparkFunSuite { val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0)) assert(dm1 === dm2.transpose) + val dmz = Matrices.dense(2, 2, Array(0.0, 0.0, 0.0, 0.0)) + assert(dmz === dmz.transpose) + val sm1 = dm1.asInstanceOf[DenseMatrix].toSparse assert(sm1 === sm1) assert(sm1 === dm1) @@ -91,17 +94,15 @@ class MatricesSuite extends SparkFunSuite { assert(sm1 === sm2.transpose) assert(sm1 === dm2.transpose) - val dm3 = Matrices.dense(3, 3, Array(1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0)) - val sm3 = dm3.asInstanceOf[DenseMatrix].toSparse - assert(sm3 === sm3) - assert(sm3 === dm3) - assert(sm3 !== sm3.transpose) - - val dmEye = Matrices.eye(10) - val smEye = Matrices.speye(10) - assert(smEye === smEye) - assert(smEye === dmEye) - assert(smEye === smEye.transpose) + val smz = dmz.asInstanceOf[DenseMatrix].toSparse + val sm2z = dm2.asInstanceOf[DenseMatrix].toSparse + sm2z(0,1) = 0.0 + sm2z(1,0) = 0.0 + sm2z(1,1) = 0.0 + assert(smz === sm2z) + assert(smz === dmz) + assert(sm2z === dmz) + assert(sm2z === smz) } test("matrix copies are deep copies") {