From 1b1a0c6c92fc94bc31b8060e1b1f834db20c9d21 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Fri, 4 Dec 2015 09:36:47 -0500
Subject: [PATCH 1/3] avoid view

---
 .../org/apache/spark/mllib/stat/test/ChiSqTest.scala     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 23c8d7c7c8075..63ba1ee7ee975 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -109,9 +109,10 @@ private[stat] object ChiSqTest extends Logging {
           }
           i += 1
           distinctLabels += label
-          features.toArray.view.zipWithIndex.slice(startCol, endCol).map { case (feature, col) =>
-            allDistinctFeatures(col) += feature
-            (col, feature, label)
+          features.toArray.slice(startCol, endCol).zip(startCol until endCol).map {
+            case (feature, col) =>
+              allDistinctFeatures(col) += feature
+              (col, feature, label)
           }
         }
       }.countByValue()
@@ -122,7 +123,7 @@ private[stat] object ChiSqTest extends Logging {
           pairCounts.keys.filter(_._1 == startCol).map(_._3).toArray.distinct.zipWithIndex.toMap
       }
       val numLabels = labels.size
-      pairCounts.keys.groupBy(_._1).map { case (col, keys) =>
+      pairCounts.keys.groupBy(_._1).foreach { case (col, keys) =>
         val features = keys.map(_._2).toArray.distinct.zipWithIndex.toMap
         val numRows = features.size
         val contingency = new BDM(numRows, numLabels, new Array[Double](numRows * numLabels))

From 8d8327d7f828ed865a103509f361292a69402129 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Mon, 7 Dec 2015 17:22:26 +0800
Subject: [PATCH 2/3] optimize loop

---
 .../org/apache/spark/mllib/stat/test/ChiSqTest.scala     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 63ba1ee7ee975..57bf32e927970 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -109,10 +109,11 @@ private[stat] object ChiSqTest extends Logging {
           }
           i += 1
           distinctLabels += label
-          features.toArray.slice(startCol, endCol).zip(startCol until endCol).map {
-            case (feature, col) =>
-              allDistinctFeatures(col) += feature
-              (col, feature, label)
+          val featureArray = features.toArray
+          (startCol until endCol).map { col =>
+            val feature = featureArray(col)
+            allDistinctFeatures(col) += feature
+            (col, feature, label)
           }
         }
       }.countByValue()

From a709f49a751aa07ccbefdd6a44a5c1afa4b57f35 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Tue, 12 Jan 2016 10:04:19 +0800
Subject: [PATCH 3/3] use to breeze for features

---
 .../scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 907c88a17f5e1..4a3fb06469818 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -109,9 +109,9 @@ private[stat] object ChiSqTest extends Logging {
           }
           i += 1
           distinctLabels += label
-          val featureArray = features.toArray
+          val brzFeatures = features.toBreeze
           (startCol until endCol).map { col =>
-            val feature = featureArray(col)
+            val feature = brzFeatures(col)
             allDistinctFeatures(col) += feature
             (col, feature, label)
           }