From 45ee9b7cccf8ecb25647df5d2deb819caddab26a Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sat, 21 Jun 2014 23:10:47 +0100 Subject: [PATCH] Add simple note that data need not be centered for computePrincipalComponents --- .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 1a0073c9d487..695e03b736ba 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -347,6 +347,8 @@ class RowMatrix( * The principal components are stored a local matrix of size n-by-k. * Each column corresponds for one principal component, * and the columns are in descending order of component variance. + * The row data do not need to be "centered" first; it is not necessary for + * the mean of each column to be 0. * * @param k number of top principal components. * @return a matrix of size n-by-k, whose columns are principal components