Skip to content

Commit 96072c3

Browse files
committed
merge master
2 parents 135f741 + 0bb15f2 commit 96072c3

File tree

41 files changed

+309
-222
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+309
-222
lines changed

core/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
<name>Spark Project Core</name>
3535
<url>http://spark.apache.org/</url>
3636
<dependencies>
37+
<dependency>
38+
<groupId>com.google.guava</groupId>
39+
<artifactId>guava</artifactId>
40+
</dependency>
3741
<dependency>
3842
<groupId>com.twitter</groupId>
3943
<artifactId>chill_${scala.binary.version}</artifactId>

examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql
1919

2020
import org.apache.spark.{SparkConf, SparkContext}
2121
import org.apache.spark.sql.SQLContext
22-
import org.apache.spark.sql.api.scala.dsl._
22+
import org.apache.spark.sql.Dsl._
2323

2424
// One method for defining the schema of an RDD is to make a case class with the desired column
2525
// names and types.

graphx/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@
4040
<artifactId>spark-core_${scala.binary.version}</artifactId>
4141
<version>${project.version}</version>
4242
</dependency>
43+
<dependency>
44+
<groupId>com.google.guava</groupId>
45+
<artifactId>guava</artifactId>
46+
</dependency>
4347
<dependency>
4448
<groupId>org.jblas</groupId>
4549
<artifactId>jblas</artifactId>

mllib/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
<directory>../python</directory>
126126
<includes>
127127
<include>pyspark/mllib/*.py</include>
128+
<include>pyspark/mllib/stat/*.py</include>
128129
<include>pyspark/ml/*.py</include>
129130
<include>pyspark/ml/param/*.py</include>
130131
</includes>

mllib/src/main/scala/org/apache/spark/ml/Transformer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.spark.Logging
2323
import org.apache.spark.annotation.AlphaComponent
2424
import org.apache.spark.ml.param._
2525
import org.apache.spark.sql.DataFrame
26-
import org.apache.spark.sql.api.scala.dsl._
26+
import org.apache.spark.sql.Dsl._
2727
import org.apache.spark.sql.types._
2828

2929
/**

mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
2424
import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT}
2525
import org.apache.spark.mllib.regression.LabeledPoint
2626
import org.apache.spark.sql._
27-
import org.apache.spark.sql.api.scala.dsl._
27+
import org.apache.spark.sql.Dsl._
2828
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
2929
import org.apache.spark.storage.StorageLevel
3030

mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.spark.ml.param._
2323
import org.apache.spark.mllib.feature
2424
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
2525
import org.apache.spark.sql._
26-
import org.apache.spark.sql.api.scala.dsl._
26+
import org.apache.spark.sql.Dsl._
2727
import org.apache.spark.sql.types.{StructField, StructType}
2828

2929
/**

mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import org.apache.spark.ml.{Estimator, Model}
3333
import org.apache.spark.ml.param._
3434
import org.apache.spark.rdd.RDD
3535
import org.apache.spark.sql.DataFrame
36-
import org.apache.spark.sql.api.scala.dsl._
36+
import org.apache.spark.sql.Dsl._
3737
import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType}
3838
import org.apache.spark.util.Utils
3939
import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
@@ -536,7 +536,7 @@ object ALS extends Logging {
536536

537537
/** Builds a [[RatingBlock]]. */
538538
def build(): RatingBlock[ID] = {
539-
new RatingBlock[ID](srcIds.result(), dstIds.result(), ratings.result())
539+
RatingBlock[ID](srcIds.result(), dstIds.result(), ratings.result())
540540
}
541541
}
542542

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
"""
19+
Python package for statistical functions in MLlib.
20+
"""
21+
22+
from pyspark.mllib.stat._statistics import *
23+
24+
__all__ = ["Statistics", "MultivariateStatisticalSummary"]

python/pyspark/mllib/stat.py renamed to python/pyspark/mllib/stat/_statistics.py

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,14 @@
1515
# limitations under the License.
1616
#
1717

18-
"""
19-
Python package for statistical functions in MLlib.
20-
"""
21-
2218
from pyspark import RDD
2319
from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
2420
from pyspark.mllib.linalg import Matrix, _convert_to_vector
2521
from pyspark.mllib.regression import LabeledPoint
22+
from pyspark.mllib.stat.test import ChiSqTestResult
2623

2724

28-
__all__ = ['MultivariateStatisticalSummary', 'ChiSqTestResult', 'Statistics']
25+
__all__ = ['MultivariateStatisticalSummary', 'Statistics']
2926

3027

3128
class MultivariateStatisticalSummary(JavaModelWrapper):
@@ -53,54 +50,6 @@ def min(self):
5350
return self.call("min").toArray()
5451

5552

56-
class ChiSqTestResult(JavaModelWrapper):
57-
"""
58-
.. note:: Experimental
59-
60-
Object containing the test results for the chi-squared hypothesis test.
61-
"""
62-
@property
63-
def method(self):
64-
"""
65-
Name of the test method
66-
"""
67-
return self._java_model.method()
68-
69-
@property
70-
def pValue(self):
71-
"""
72-
The probability of obtaining a test statistic result at least as
73-
extreme as the one that was actually observed, assuming that the
74-
null hypothesis is true.
75-
"""
76-
return self._java_model.pValue()
77-
78-
@property
79-
def degreesOfFreedom(self):
80-
"""
81-
Returns the degree(s) of freedom of the hypothesis test.
82-
Return type should be Number(e.g. Int, Double) or tuples of Numbers.
83-
"""
84-
return self._java_model.degreesOfFreedom()
85-
86-
@property
87-
def statistic(self):
88-
"""
89-
Test statistic.
90-
"""
91-
return self._java_model.statistic()
92-
93-
@property
94-
def nullHypothesis(self):
95-
"""
96-
Null hypothesis of the test.
97-
"""
98-
return self._java_model.nullHypothesis()
99-
100-
def __str__(self):
101-
return self._java_model.toString()
102-
103-
10453
class Statistics(object):
10554

10655
@staticmethod

0 commit comments

Comments
 (0)