From 83734242ee474c8fa74e5f495676a829bd99b196 Mon Sep 17 00:00:00 2001 From: lee19 Date: Tue, 23 Jun 2015 22:23:47 +0900 Subject: [PATCH 1/4] [SPARK-8563][MLlib] Fixed a bug so that IndexedRowMatrix.computeSVD().U.numCols = k --- .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 3be530fa0753..b3d1ed41ac80 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -146,7 +146,7 @@ class IndexedRowMatrix( val indexedRows = indices.zip(svd.U.rows).map { case (i, v) => IndexedRow(i, v) } - new IndexedRowMatrix(indexedRows, nRows, nCols) + new IndexedRowMatrix(indexedRows, svd.U.numRows(), svd.U.numCols()) } else { null } From c2ccd89cec503a09073d84503064afaf211b9fc8 Mon Sep 17 00:00:00 2001 From: lee19 Date: Tue, 23 Jun 2015 22:24:12 +0900 Subject: [PATCH 2/4] Added a unit test that validates matrix sizes of svd for [SPARK-8563][MLlib] --- .../linalg/distributed/IndexedRowMatrixSuite.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala index 4a7b99a976f0..7bb2f8fc9091 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala @@ -135,6 +135,17 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(closeToZero(U * brzDiag(s) * V.t - localA)) } + test("validate matrix sizes of svd") { + val k = 2 + val A = new IndexedRowMatrix(indexedRows) + val svd = A.computeSVD(k, computeU = true) + assert(svd.U.numRows() === m) + assert(svd.U.numCols() === k) + assert(svd.s.size === k) + assert(svd.V.numRows() === n) + assert(svd.V.numCols() === k) + } + test("validate k in svd") { val A = new IndexedRowMatrix(indexedRows) intercept[IllegalArgumentException] { From 4b9803bdad08537ad40ccd0efcc69333c5c4eb34 Mon Sep 17 00:00:00 2001 From: lee19 Date: Fri, 26 Jun 2015 16:28:37 +0900 Subject: [PATCH 3/4] [SPARK-8563] [MLlib] Fixed a build error. --- .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala | 2 +- .../mllib/linalg/distributed/IndexedRowMatrixSuite.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index b3d1ed41ac80..b447441890d4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -146,7 +146,7 @@ class IndexedRowMatrix( val indexedRows = indices.zip(svd.U.rows).map { case (i, v) => IndexedRow(i, v) } - new IndexedRowMatrix(indexedRows, svd.U.numRows(), svd.U.numCols()) + new IndexedRowMatrix(indexedRows, svd.U.numRows(), svd.U.numCols().toInt) } else { null } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala index 7bb2f8fc9091..0ecb7a221a50 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala @@ -142,8 +142,8 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(svd.U.numRows() === m) assert(svd.U.numCols() === k) assert(svd.s.size === k) - assert(svd.V.numRows() === n) - assert(svd.V.numCols() === k) + assert(svd.V.numRows === n) + assert(svd.V.numCols === k) } test("validate k in svd") { From c1812a0a8a79629c16ff4cfe167c096f548a4764 Mon Sep 17 00:00:00 2001 From: lee19 Date: Tue, 30 Jun 2015 23:38:15 +0900 Subject: [PATCH 4/4] [SPARK-8563] [MLlib] Used nRows instead of numRows() to reduce a burden. --- .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index b447441890d4..1c33b43ea7a8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -146,7 +146,7 @@ class IndexedRowMatrix( val indexedRows = indices.zip(svd.U.rows).map { case (i, v) => IndexedRow(i, v) } - new IndexedRowMatrix(indexedRows, svd.U.numRows(), svd.U.numCols().toInt) + new IndexedRowMatrix(indexedRows, nRows, svd.U.numCols().toInt) } else { null }