CodingCat
diff --git a/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions b/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎graphx/pom.xml‎
Lines changed: 4 additions & 0 deletions b/‎graphx/pom.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎mllib/pom.xml‎
Lines changed: 1 addition & 0 deletions b/‎mllib/pom.xml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Transformer.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/Transformer.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala‎
Lines changed: 2 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/pyspark/mllib/stat/__init__.py‎
Lines changed: 24 additions & 0 deletions b/‎python/pyspark/mllib/stat/__init__.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎python/pyspark/mllib/stat.py‎ renamed to ‎python/pyspark/mllib/stat/_statistics.py‎
Lines changed: 2 additions & 53 deletions b/‎python/pyspark/mllib/stat.py‎ renamed to ‎python/pyspark/mllib/stat/_statistics.py‎
Lines changed: 2 additions & 53 deletions
@@ -34,6 +34,10 @@
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
   <dependencies>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>chill_${scala.binary.version}</artifactId>
 
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.api.scala.dsl._
+import org.apache.spark.sql.Dsl._
 
 // One method for defining the schema of an RDD is to make a case class with the desired column
 // names and types.
 
@@ -40,6 +40,10 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.jblas</groupId>
       <artifactId>jblas</artifactId>
 
@@ -125,6 +125,7 @@
         <directory>../python</directory>
         <includes>
           <include>pyspark/mllib/*.py</include>
+          <include>pyspark/mllib/stat/*.py</include>
           <include>pyspark/ml/*.py</include>
           <include>pyspark/ml/param/*.py</include>
         </includes>
 
@@ -23,7 +23,7 @@ import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.param._
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.api.scala.dsl._
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.types._
 
 /**
 
@@ -24,7 +24,7 @@ import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
 import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.sql._
-import org.apache.spark.sql.api.scala.dsl._
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 import org.apache.spark.storage.StorageLevel
 
 
@@ -23,7 +23,7 @@ import org.apache.spark.ml.param._
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql._
-import org.apache.spark.sql.api.scala.dsl._
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
 
@@ -33,7 +33,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.api.scala.dsl._
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
@@ -536,7 +536,7 @@ object ALS extends Logging {
 
     /** Builds a [[RatingBlock]]. */
     def build(): RatingBlock[ID] = {
-      new RatingBlock[ID](srcIds.result(), dstIds.result(), ratings.result())
+      RatingBlock[ID](srcIds.result(), dstIds.result(), ratings.result())
     }
   }
 
 
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for statistical functions in MLlib.
+"""
+
+from pyspark.mllib.stat._statistics import *
+
+__all__ = ["Statistics", "MultivariateStatisticalSummary"]
@@ -15,17 +15,14 @@
 # limitations under the License.
 #
 
-"""
-Python package for statistical functions in MLlib.
-"""
-
 from pyspark import RDD
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
 from pyspark.mllib.linalg import Matrix, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat.test import ChiSqTestResult
 
 
-__all__ = ['MultivariateStatisticalSummary', 'ChiSqTestResult', 'Statistics']
+__all__ = ['MultivariateStatisticalSummary', 'Statistics']
 
 
 class MultivariateStatisticalSummary(JavaModelWrapper):
@@ -53,54 +50,6 @@ def min(self):
         return self.call("min").toArray()
 
 
-class ChiSqTestResult(JavaModelWrapper):
-    """
-    .. note:: Experimental
-
-    Object containing the test results for the chi-squared hypothesis test.
-    """
-    @property
-    def method(self):
-        """
-        Name of the test method
-        """
-        return self._java_model.method()
-
-    @property
-    def pValue(self):
-        """
-        The probability of obtaining a test statistic result at least as
-        extreme as the one that was actually observed, assuming that the
-        null hypothesis is true.
-        """
-        return self._java_model.pValue()
-
-    @property
-    def degreesOfFreedom(self):
-        """
-        Returns the degree(s) of freedom of the hypothesis test.
-        Return type should be Number(e.g. Int, Double) or tuples of Numbers.
-        """
-        return self._java_model.degreesOfFreedom()
-
-    @property
-    def statistic(self):
-        """
-        Test statistic.
-        """
-        return self._java_model.statistic()
-
-    @property
-    def nullHypothesis(self):
-        """
-        Null hypothesis of the test.
-        """
-        return self._java_model.nullHypothesis()
-
-    def __str__(self):
-        return self._java_model.toString()
-
-
 class Statistics(object):
 
     @staticmethod