Skip to content

Commit 45dda92

Browse files
yinxusensrowen
authored andcommitted
[SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix
## What changes were proposed in this pull request? The following Java code because of type erasing: ```Java JavaRDD<Vector> rows = jsc.parallelize(...); RowMatrix mat = new RowMatrix(rows.rdd()); QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true); ``` We should use retag to restore the type to prevent the following exception: ```Java java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector; ``` ## How was this patch tested? Java unit test Author: Xusen Yin <[email protected]> Closes #14051 from yinxusen/SPARK-16372. (cherry picked from commit 4c6f00d) Signed-off-by: Sean Owen <[email protected]>
1 parent 2588776 commit 45dda92

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ private[python] class PythonMLLibAPI extends Serializable {
11101110
* Wrapper around RowMatrix constructor.
11111111
*/
11121112
def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = {
1113-
new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols)
1113+
new RowMatrix(rows.rdd, numRows, numCols)
11141114
}
11151115

11161116
/**

mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ class RowMatrix @Since("1.0.0") (
526526
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
527527
val col = numCols().toInt
528528
// split rows horizontally into smaller matrices, and compute QR for each of them
529-
val blockQRs = rows.glom().map { partRows =>
529+
val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows =>
530530
val bdm = BDM.zeros[Double](partRows.length, col)
531531
var i = 0
532532
partRows.foreach { row =>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.mllib.linalg.distributed;
19+
20+
import java.util.Arrays;
21+
22+
import org.junit.Test;
23+
24+
import org.apache.spark.SharedSparkSession;
25+
import org.apache.spark.api.java.JavaRDD;
26+
import org.apache.spark.mllib.linalg.Matrix;
27+
import org.apache.spark.mllib.linalg.QRDecomposition;
28+
import org.apache.spark.mllib.linalg.Vector;
29+
import org.apache.spark.mllib.linalg.Vectors;
30+
31+
public class JavaRowMatrixSuite extends SharedSparkSession {
32+
33+
@Test
34+
public void rowMatrixQRDecomposition() {
35+
Vector v1 = Vectors.dense(1.0, 10.0, 100.0);
36+
Vector v2 = Vectors.dense(2.0, 20.0, 200.0);
37+
Vector v3 = Vectors.dense(3.0, 30.0, 300.0);
38+
39+
JavaRDD<Vector> rows = jsc.parallelize(Arrays.asList(v1, v2, v3), 1);
40+
RowMatrix mat = new RowMatrix(rows.rdd());
41+
42+
QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true);
43+
}
44+
}

0 commit comments

Comments
 (0)